{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "from scipy import stats"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 读取数据"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "读取数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>客户编号</th>\n",
       "      <th>已发货款</th>\n",
       "      <th>资产成本</th>\n",
       "      <th>贷款与资产比列</th>\n",
       "      <th>品牌</th>\n",
       "      <th>骑车销售商</th>\n",
       "      <th>车厂</th>\n",
       "      <th>出生日期</th>\n",
       "      <th>货款日期</th>\n",
       "      <th>地区</th>\n",
       "      <th>...</th>\n",
       "      <th>尚未还清有效贷款总额</th>\n",
       "      <th>已批准贷款总额</th>\n",
       "      <th>已发放贷款总额</th>\n",
       "      <th>每月还款总额</th>\n",
       "      <th>贷款与已还贷款比列</th>\n",
       "      <th>主账户还款期数</th>\n",
       "      <th>次账户还款期数</th>\n",
       "      <th>贷款与已批准贷款比列</th>\n",
       "      <th>总贷款次数与总有效贷款次数比</th>\n",
       "      <th>工作类型</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>601758</td>\n",
       "      <td>65532</td>\n",
       "      <td>78990</td>\n",
       "      <td>84.38</td>\n",
       "      <td>136</td>\n",
       "      <td>20490</td>\n",
       "      <td>45</td>\n",
       "      <td>1981</td>\n",
       "      <td>2018</td>\n",
       "      <td>8</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.00</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.00</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>519488</td>\n",
       "      <td>56759</td>\n",
       "      <td>65325</td>\n",
       "      <td>89.55</td>\n",
       "      <td>61</td>\n",
       "      <td>22778</td>\n",
       "      <td>86</td>\n",
       "      <td>1967</td>\n",
       "      <td>2018</td>\n",
       "      <td>6</td>\n",
       "      <td>...</td>\n",
       "      <td>2054139</td>\n",
       "      <td>2036500</td>\n",
       "      <td>2036500</td>\n",
       "      <td>34455</td>\n",
       "      <td>0.99</td>\n",
       "      <td>59</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.33</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>447579</td>\n",
       "      <td>58413</td>\n",
       "      <td>67960</td>\n",
       "      <td>89.02</td>\n",
       "      <td>5</td>\n",
       "      <td>15663</td>\n",
       "      <td>86</td>\n",
       "      <td>1977</td>\n",
       "      <td>2018</td>\n",
       "      <td>9</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.00</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.00</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>648134</td>\n",
       "      <td>72317</td>\n",
       "      <td>99750</td>\n",
       "      <td>73.68</td>\n",
       "      <td>76</td>\n",
       "      <td>17242</td>\n",
       "      <td>48</td>\n",
       "      <td>1995</td>\n",
       "      <td>2018</td>\n",
       "      <td>8</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>13813</td>\n",
       "      <td>13813</td>\n",
       "      <td>0</td>\n",
       "      <td>13814.00</td>\n",
       "      <td>13813</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.00</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>458210</td>\n",
       "      <td>50078</td>\n",
       "      <td>65450</td>\n",
       "      <td>79.45</td>\n",
       "      <td>146</td>\n",
       "      <td>14181</td>\n",
       "      <td>45</td>\n",
       "      <td>1974</td>\n",
       "      <td>2018</td>\n",
       "      <td>17</td>\n",
       "      <td>...</td>\n",
       "      <td>467161</td>\n",
       "      <td>550000</td>\n",
       "      <td>550000</td>\n",
       "      <td>12863</td>\n",
       "      <td>1.18</td>\n",
       "      <td>42</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.06</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 49 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     客户编号   已发货款   资产成本  贷款与资产比列   品牌  骑车销售商  车厂  出生日期  货款日期  地区  ...  \\\n",
       "0  601758  65532  78990    84.38  136  20490  45  1981  2018   8  ...   \n",
       "1  519488  56759  65325    89.55   61  22778  86  1967  2018   6  ...   \n",
       "2  447579  58413  67960    89.02    5  15663  86  1977  2018   9  ...   \n",
       "3  648134  72317  99750    73.68   76  17242  48  1995  2018   8  ...   \n",
       "4  458210  50078  65450    79.45  146  14181  45  1974  2018  17  ...   \n",
       "\n",
       "   尚未还清有效贷款总额  已批准贷款总额  已发放贷款总额  每月还款总额  贷款与已还贷款比列  主账户还款期数  次账户还款期数  \\\n",
       "0           0        0        0       0       1.00        0        0   \n",
       "1     2054139  2036500  2036500   34455       0.99       59        0   \n",
       "2           0        0        0       0       1.00        0        0   \n",
       "3           0    13813    13813       0   13814.00    13813        0   \n",
       "4      467161   550000   550000   12863       1.18       42        0   \n",
       "\n",
       "   贷款与已批准贷款比列  总贷款次数与总有效贷款次数比  工作类型  \n",
       "0         1.0            1.00     0  \n",
       "1         1.0            1.33     1  \n",
       "2         1.0            1.00     1  \n",
       "3         1.0            2.00     0  \n",
       "4         1.0            1.06     1  \n",
       "\n",
       "[5 rows x 49 columns]"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = pd.read_csv(\"./车贷违约预测.csv\",encoding = 'ANSI')\n",
    "data.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "查看数据统计描述"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>客户编号</th>\n",
       "      <th>已发货款</th>\n",
       "      <th>资产成本</th>\n",
       "      <th>贷款与资产比列</th>\n",
       "      <th>品牌</th>\n",
       "      <th>骑车销售商</th>\n",
       "      <th>车厂</th>\n",
       "      <th>出生日期</th>\n",
       "      <th>货款日期</th>\n",
       "      <th>地区</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>199717.000000</td>\n",
       "      <td>199717.000000</td>\n",
       "      <td>1.997170e+05</td>\n",
       "      <td>199717.000000</td>\n",
       "      <td>199717.000000</td>\n",
       "      <td>199717.000000</td>\n",
       "      <td>199717.000000</td>\n",
       "      <td>199717.000000</td>\n",
       "      <td>199717.0</td>\n",
       "      <td>199717.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>535690.886665</td>\n",
       "      <td>54256.272280</td>\n",
       "      <td>7.582391e+04</td>\n",
       "      <td>74.643960</td>\n",
       "      <td>72.698508</td>\n",
       "      <td>19634.049665</td>\n",
       "      <td>69.085766</td>\n",
       "      <td>1983.876921</td>\n",
       "      <td>2018.0</td>\n",
       "      <td>7.245222</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>68193.411418</td>\n",
       "      <td>12977.656996</td>\n",
       "      <td>1.892894e+04</td>\n",
       "      <td>11.490485</td>\n",
       "      <td>69.706185</td>\n",
       "      <td>3493.655400</td>\n",
       "      <td>22.128288</td>\n",
       "      <td>9.805565</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.481338</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>417428.000000</td>\n",
       "      <td>13320.000000</td>\n",
       "      <td>3.700000e+04</td>\n",
       "      <td>10.030000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>10524.000000</td>\n",
       "      <td>45.000000</td>\n",
       "      <td>1949.000000</td>\n",
       "      <td>2018.0</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>476762.000000</td>\n",
       "      <td>46977.000000</td>\n",
       "      <td>6.571400e+04</td>\n",
       "      <td>68.730000</td>\n",
       "      <td>14.000000</td>\n",
       "      <td>16505.000000</td>\n",
       "      <td>48.000000</td>\n",
       "      <td>1977.000000</td>\n",
       "      <td>2018.0</td>\n",
       "      <td>4.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>535571.000000</td>\n",
       "      <td>53703.000000</td>\n",
       "      <td>7.092200e+04</td>\n",
       "      <td>76.670000</td>\n",
       "      <td>61.000000</td>\n",
       "      <td>20333.000000</td>\n",
       "      <td>86.000000</td>\n",
       "      <td>1986.000000</td>\n",
       "      <td>2018.0</td>\n",
       "      <td>6.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>594571.000000</td>\n",
       "      <td>60247.000000</td>\n",
       "      <td>7.915900e+04</td>\n",
       "      <td>83.590000</td>\n",
       "      <td>130.000000</td>\n",
       "      <td>23000.000000</td>\n",
       "      <td>86.000000</td>\n",
       "      <td>1992.000000</td>\n",
       "      <td>2018.0</td>\n",
       "      <td>10.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>671084.000000</td>\n",
       "      <td>990572.000000</td>\n",
       "      <td>1.628992e+06</td>\n",
       "      <td>95.000000</td>\n",
       "      <td>261.000000</td>\n",
       "      <td>24803.000000</td>\n",
       "      <td>156.000000</td>\n",
       "      <td>2000.000000</td>\n",
       "      <td>2018.0</td>\n",
       "      <td>22.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                客户编号           已发货款          资产成本        贷款与资产比列  \\\n",
       "count  199717.000000  199717.000000  1.997170e+05  199717.000000   \n",
       "mean   535690.886665   54256.272280  7.582391e+04      74.643960   \n",
       "std     68193.411418   12977.656996  1.892894e+04      11.490485   \n",
       "min    417428.000000   13320.000000  3.700000e+04      10.030000   \n",
       "25%    476762.000000   46977.000000  6.571400e+04      68.730000   \n",
       "50%    535571.000000   53703.000000  7.092200e+04      76.670000   \n",
       "75%    594571.000000   60247.000000  7.915900e+04      83.590000   \n",
       "max    671084.000000  990572.000000  1.628992e+06      95.000000   \n",
       "\n",
       "                  品牌          骑车销售商             车厂           出生日期      货款日期  \\\n",
       "count  199717.000000  199717.000000  199717.000000  199717.000000  199717.0   \n",
       "mean       72.698508   19634.049665      69.085766    1983.876921    2018.0   \n",
       "std        69.706185    3493.655400      22.128288       9.805565       0.0   \n",
       "min         1.000000   10524.000000      45.000000    1949.000000    2018.0   \n",
       "25%        14.000000   16505.000000      48.000000    1977.000000    2018.0   \n",
       "50%        61.000000   20333.000000      86.000000    1986.000000    2018.0   \n",
       "75%       130.000000   23000.000000      86.000000    1992.000000    2018.0   \n",
       "max       261.000000   24803.000000     156.000000    2000.000000    2018.0   \n",
       "\n",
       "                  地区  \n",
       "count  199717.000000  \n",
       "mean        7.245222  \n",
       "std         4.481338  \n",
       "min         1.000000  \n",
       "25%         4.000000  \n",
       "50%         6.000000  \n",
       "75%        10.000000  \n",
       "max        22.000000  "
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 统计类描述\n",
    "data.describe().iloc[:,:10]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>对接员工编号</th>\n",
       "      <th>是否填写手机号</th>\n",
       "      <th>受否填写身份证</th>\n",
       "      <th>是否出具驾驶证</th>\n",
       "      <th>是否填写护照</th>\n",
       "      <th>信用评分</th>\n",
       "      <th>主账户贷款次数</th>\n",
       "      <th>主账户有效贷款次数</th>\n",
       "      <th>主账户中尚未还清有效贷款</th>\n",
       "      <th>主账户中已批准的贷款</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>199717.000000</td>\n",
       "      <td>199717.0</td>\n",
       "      <td>199717.0</td>\n",
       "      <td>199717.000000</td>\n",
       "      <td>199717.000000</td>\n",
       "      <td>199717.000000</td>\n",
       "      <td>199717.000000</td>\n",
       "      <td>199717.000000</td>\n",
       "      <td>1.997170e+05</td>\n",
       "      <td>1.997170e+05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>1547.857919</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.023348</td>\n",
       "      <td>0.002143</td>\n",
       "      <td>291.762544</td>\n",
       "      <td>2.464037</td>\n",
       "      <td>1.048414</td>\n",
       "      <td>1.687286e+05</td>\n",
       "      <td>2.224323e+05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>974.901476</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.151007</td>\n",
       "      <td>0.046243</td>\n",
       "      <td>339.317591</td>\n",
       "      <td>5.283968</td>\n",
       "      <td>1.951018</td>\n",
       "      <td>9.638043e+05</td>\n",
       "      <td>2.522528e+06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>-6.678296e+06</td>\n",
       "      <td>0.000000e+00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>712.000000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>1449.000000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>14.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>2357.000000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>680.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>3.589900e+04</td>\n",
       "      <td>6.400000e+04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>3795.000000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>890.000000</td>\n",
       "      <td>453.000000</td>\n",
       "      <td>144.000000</td>\n",
       "      <td>9.652492e+07</td>\n",
       "      <td>1.000000e+09</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              对接员工编号   是否填写手机号   受否填写身份证        是否出具驾驶证         是否填写护照  \\\n",
       "count  199717.000000  199717.0  199717.0  199717.000000  199717.000000   \n",
       "mean     1547.857919       1.0       1.0       0.023348       0.002143   \n",
       "std       974.901476       0.0       0.0       0.151007       0.046243   \n",
       "min         1.000000       1.0       1.0       0.000000       0.000000   \n",
       "25%       712.000000       1.0       1.0       0.000000       0.000000   \n",
       "50%      1449.000000       1.0       1.0       0.000000       0.000000   \n",
       "75%      2357.000000       1.0       1.0       0.000000       0.000000   \n",
       "max      3795.000000       1.0       1.0       1.000000       1.000000   \n",
       "\n",
       "                信用评分        主账户贷款次数      主账户有效贷款次数  主账户中尚未还清有效贷款    主账户中已批准的贷款  \n",
       "count  199717.000000  199717.000000  199717.000000  1.997170e+05  1.997170e+05  \n",
       "mean      291.762544       2.464037       1.048414  1.687286e+05  2.224323e+05  \n",
       "std       339.317591       5.283968       1.951018  9.638043e+05  2.522528e+06  \n",
       "min         0.000000       0.000000       0.000000 -6.678296e+06  0.000000e+00  \n",
       "25%         0.000000       0.000000       0.000000  0.000000e+00  0.000000e+00  \n",
       "50%        14.000000       1.000000       0.000000  0.000000e+00  0.000000e+00  \n",
       "75%       680.000000       3.000000       1.000000  3.589900e+04  6.400000e+04  \n",
       "max       890.000000     453.000000     144.000000  9.652492e+07  1.000000e+09  "
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.describe().iloc[:,10:20]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>主账户中已发放贷款</th>\n",
       "      <th>次账户贷款次数</th>\n",
       "      <th>次账户有效贷款次数</th>\n",
       "      <th>次账户中尚未还清有效贷款</th>\n",
       "      <th>次账户中已批准贷款</th>\n",
       "      <th>次账户中已发放贷款</th>\n",
       "      <th>主账户每月还款</th>\n",
       "      <th>次账户没用还款</th>\n",
       "      <th>近六个月新贷款次数</th>\n",
       "      <th>近六个月违约次数</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>1.997170e+05</td>\n",
       "      <td>199717.000000</td>\n",
       "      <td>199717.000000</td>\n",
       "      <td>1.997170e+05</td>\n",
       "      <td>1.997170e+05</td>\n",
       "      <td>1.997170e+05</td>\n",
       "      <td>1.997170e+05</td>\n",
       "      <td>1.997170e+05</td>\n",
       "      <td>199717.000000</td>\n",
       "      <td>199717.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>2.220420e+05</td>\n",
       "      <td>0.059524</td>\n",
       "      <td>0.027689</td>\n",
       "      <td>5.583871e+03</td>\n",
       "      <td>7.490970e+03</td>\n",
       "      <td>7.374478e+03</td>\n",
       "      <td>1.314415e+04</td>\n",
       "      <td>3.013734e+02</td>\n",
       "      <td>0.385070</td>\n",
       "      <td>0.095956</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>2.525814e+06</td>\n",
       "      <td>0.630648</td>\n",
       "      <td>0.314428</td>\n",
       "      <td>1.686728e+05</td>\n",
       "      <td>1.818362e+05</td>\n",
       "      <td>1.812332e+05</td>\n",
       "      <td>1.524289e+05</td>\n",
       "      <td>1.304531e+04</td>\n",
       "      <td>0.957339</td>\n",
       "      <td>0.380935</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>-5.746470e+05</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>6.200000e+04</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>2.000000e+03</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>1.000000e+09</td>\n",
       "      <td>52.000000</td>\n",
       "      <td>36.000000</td>\n",
       "      <td>3.603285e+07</td>\n",
       "      <td>2.688820e+07</td>\n",
       "      <td>2.688820e+07</td>\n",
       "      <td>2.564281e+07</td>\n",
       "      <td>3.246710e+06</td>\n",
       "      <td>35.000000</td>\n",
       "      <td>20.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          主账户中已发放贷款        次账户贷款次数      次账户有效贷款次数  次账户中尚未还清有效贷款     次账户中已批准贷款  \\\n",
       "count  1.997170e+05  199717.000000  199717.000000  1.997170e+05  1.997170e+05   \n",
       "mean   2.220420e+05       0.059524       0.027689  5.583871e+03  7.490970e+03   \n",
       "std    2.525814e+06       0.630648       0.314428  1.686728e+05  1.818362e+05   \n",
       "min    0.000000e+00       0.000000       0.000000 -5.746470e+05  0.000000e+00   \n",
       "25%    0.000000e+00       0.000000       0.000000  0.000000e+00  0.000000e+00   \n",
       "50%    0.000000e+00       0.000000       0.000000  0.000000e+00  0.000000e+00   \n",
       "75%    6.200000e+04       0.000000       0.000000  0.000000e+00  0.000000e+00   \n",
       "max    1.000000e+09      52.000000      36.000000  3.603285e+07  2.688820e+07   \n",
       "\n",
       "          次账户中已发放贷款       主账户每月还款       次账户没用还款      近六个月新贷款次数       近六个月违约次数  \n",
       "count  1.997170e+05  1.997170e+05  1.997170e+05  199717.000000  199717.000000  \n",
       "mean   7.374478e+03  1.314415e+04  3.013734e+02       0.385070       0.095956  \n",
       "std    1.812332e+05  1.524289e+05  1.304531e+04       0.957339       0.380935  \n",
       "min    0.000000e+00  0.000000e+00  0.000000e+00       0.000000       0.000000  \n",
       "25%    0.000000e+00  0.000000e+00  0.000000e+00       0.000000       0.000000  \n",
       "50%    0.000000e+00  0.000000e+00  0.000000e+00       0.000000       0.000000  \n",
       "75%    0.000000e+00  2.000000e+03  0.000000e+00       0.000000       0.000000  \n",
       "max    2.688820e+07  2.564281e+07  3.246710e+06      35.000000      20.000000  "
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.describe().iloc[:,20:30]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>平均贷款期限</th>\n",
       "      <th>第一次贷款距今时间</th>\n",
       "      <th>贷款查询次数</th>\n",
       "      <th>是否违约</th>\n",
       "      <th>贷款与资产比</th>\n",
       "      <th>贷款总次数</th>\n",
       "      <th>主账户无效贷款次数</th>\n",
       "      <th>次账户无效贷款次数</th>\n",
       "      <th>无效贷款总次数</th>\n",
       "      <th>尚未还清有效贷款总额</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>199717.000000</td>\n",
       "      <td>199717.000000</td>\n",
       "      <td>199717.000000</td>\n",
       "      <td>199717.000000</td>\n",
       "      <td>199717.000000</td>\n",
       "      <td>199717.000000</td>\n",
       "      <td>199717.000000</td>\n",
       "      <td>199717.000000</td>\n",
       "      <td>199717.000000</td>\n",
       "      <td>1.997170e+05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>8.058107</td>\n",
       "      <td>13.190875</td>\n",
       "      <td>0.203338</td>\n",
       "      <td>0.177391</td>\n",
       "      <td>0.723575</td>\n",
       "      <td>2.523561</td>\n",
       "      <td>1.415623</td>\n",
       "      <td>0.031835</td>\n",
       "      <td>1.447458</td>\n",
       "      <td>1.743125e+05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>13.860761</td>\n",
       "      <td>21.156865</td>\n",
       "      <td>0.694087</td>\n",
       "      <td>0.382000</td>\n",
       "      <td>0.113613</td>\n",
       "      <td>5.356066</td>\n",
       "      <td>4.038380</td>\n",
       "      <td>0.412795</td>\n",
       "      <td>4.075544</td>\n",
       "      <td>9.813640e+05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.094638</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>-6.678296e+06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.664431</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000e+00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.741715</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000e+00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>13.000000</td>\n",
       "      <td>20.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.809512</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>3.818900e+04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>117.000000</td>\n",
       "      <td>117.000000</td>\n",
       "      <td>28.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.937987</td>\n",
       "      <td>453.000000</td>\n",
       "      <td>451.000000</td>\n",
       "      <td>42.000000</td>\n",
       "      <td>451.000000</td>\n",
       "      <td>9.652492e+07</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              平均贷款期限      第一次贷款距今时间         贷款查询次数           是否违约  \\\n",
       "count  199717.000000  199717.000000  199717.000000  199717.000000   \n",
       "mean        8.058107      13.190875       0.203338       0.177391   \n",
       "std        13.860761      21.156865       0.694087       0.382000   \n",
       "min         0.000000       0.000000       0.000000       0.000000   \n",
       "25%         0.000000       0.000000       0.000000       0.000000   \n",
       "50%         0.000000       0.000000       0.000000       0.000000   \n",
       "75%        13.000000      20.000000       0.000000       0.000000   \n",
       "max       117.000000     117.000000      28.000000       1.000000   \n",
       "\n",
       "              贷款与资产比          贷款总次数      主账户无效贷款次数      次账户无效贷款次数  \\\n",
       "count  199717.000000  199717.000000  199717.000000  199717.000000   \n",
       "mean        0.723575       2.523561       1.415623       0.031835   \n",
       "std         0.113613       5.356066       4.038380       0.412795   \n",
       "min         0.094638       0.000000       0.000000       0.000000   \n",
       "25%         0.664431       0.000000       0.000000       0.000000   \n",
       "50%         0.741715       1.000000       0.000000       0.000000   \n",
       "75%         0.809512       3.000000       1.000000       0.000000   \n",
       "max         0.937987     453.000000     451.000000      42.000000   \n",
       "\n",
       "             无效贷款总次数    尚未还清有效贷款总额  \n",
       "count  199717.000000  1.997170e+05  \n",
       "mean        1.447458  1.743125e+05  \n",
       "std         4.075544  9.813640e+05  \n",
       "min         0.000000 -6.678296e+06  \n",
       "25%         0.000000  0.000000e+00  \n",
       "50%         0.000000  0.000000e+00  \n",
       "75%         1.000000  3.818900e+04  \n",
       "max       451.000000  9.652492e+07  "
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.describe().iloc[:,30:40]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>已批准贷款总额</th>\n",
       "      <th>已发放贷款总额</th>\n",
       "      <th>每月还款总额</th>\n",
       "      <th>贷款与已还贷款比列</th>\n",
       "      <th>主账户还款期数</th>\n",
       "      <th>次账户还款期数</th>\n",
       "      <th>贷款与已批准贷款比列</th>\n",
       "      <th>总贷款次数与总有效贷款次数比</th>\n",
       "      <th>工作类型</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>1.997170e+05</td>\n",
       "      <td>1.997170e+05</td>\n",
       "      <td>1.997170e+05</td>\n",
       "      <td>199717.00</td>\n",
       "      <td>1.997170e+05</td>\n",
       "      <td>1.997170e+05</td>\n",
       "      <td>1.997170e+05</td>\n",
       "      <td>199717.000000</td>\n",
       "      <td>199717.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>2.299233e+05</td>\n",
       "      <td>2.294165e+05</td>\n",
       "      <td>1.344553e+04</td>\n",
       "      <td>inf</td>\n",
       "      <td>5.059582e+04</td>\n",
       "      <td>2.928000e+03</td>\n",
       "      <td>5.535709e+02</td>\n",
       "      <td>1.438913</td>\n",
       "      <td>0.487475</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>2.530977e+06</td>\n",
       "      <td>2.534185e+06</td>\n",
       "      <td>1.531618e+05</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.275670e+06</td>\n",
       "      <td>1.065410e+05</td>\n",
       "      <td>1.141343e+05</td>\n",
       "      <td>0.792213</td>\n",
       "      <td>0.561915</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>-110000.33</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>1.00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>1.00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>6.720600e+04</td>\n",
       "      <td>6.508500e+04</td>\n",
       "      <td>2.094000e+03</td>\n",
       "      <td>1.26</td>\n",
       "      <td>2.500000e+01</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>1.670000</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>1.000000e+09</td>\n",
       "      <td>1.000000e+09</td>\n",
       "      <td>2.564281e+07</td>\n",
       "      <td>inf</td>\n",
       "      <td>1.000000e+09</td>\n",
       "      <td>1.980000e+07</td>\n",
       "      <td>5.000000e+07</td>\n",
       "      <td>18.000000</td>\n",
       "      <td>2.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            已批准贷款总额       已发放贷款总额        每月还款总额  贷款与已还贷款比列       主账户还款期数  \\\n",
       "count  1.997170e+05  1.997170e+05  1.997170e+05  199717.00  1.997170e+05   \n",
       "mean   2.299233e+05  2.294165e+05  1.344553e+04        inf  5.059582e+04   \n",
       "std    2.530977e+06  2.534185e+06  1.531618e+05        NaN  2.275670e+06   \n",
       "min    0.000000e+00  0.000000e+00  0.000000e+00 -110000.33  0.000000e+00   \n",
       "25%    0.000000e+00  0.000000e+00  0.000000e+00       1.00  0.000000e+00   \n",
       "50%    0.000000e+00  0.000000e+00  0.000000e+00       1.00  0.000000e+00   \n",
       "75%    6.720600e+04  6.508500e+04  2.094000e+03       1.26  2.500000e+01   \n",
       "max    1.000000e+09  1.000000e+09  2.564281e+07        inf  1.000000e+09   \n",
       "\n",
       "            次账户还款期数    贷款与已批准贷款比列  总贷款次数与总有效贷款次数比           工作类型  \n",
       "count  1.997170e+05  1.997170e+05   199717.000000  199717.000000  \n",
       "mean   2.928000e+03  5.535709e+02        1.438913       0.487475  \n",
       "std    1.065410e+05  1.141343e+05        0.792213       0.561915  \n",
       "min    0.000000e+00  0.000000e+00        1.000000       0.000000  \n",
       "25%    0.000000e+00  1.000000e+00        1.000000       0.000000  \n",
       "50%    0.000000e+00  1.000000e+00        1.000000       0.000000  \n",
       "75%    0.000000e+00  1.000000e+00        1.670000       1.000000  \n",
       "max    1.980000e+07  5.000000e+07       18.000000       2.000000  "
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.describe().iloc[:,40:]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 数据处理"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "heading_collapsed": true
   },
   "source": [
    "## 异常值处理 缺失值处理"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "hidden": true,
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "101014"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 数据有效性 没贷款的人没有预测价值 谈不上违约\n",
    "data = data[data['贷款总次数'] != 0]\n",
    "data['贷款总次数'].count()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#查看有无重复 ： 无\n",
    "data.duplicated().sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "# 使用盖帽方法处理极端值\n",
    "\n",
    "def block(x): \n",
    "    qu1 = x.quantile(.9)\n",
    "    qu2 = x.quantile(.1)\n",
    "    out = x.mask(x>qu1,qu1)#  看mask解决这个问题 \n",
    "    out = x.mask(x<qu2,qu2)\n",
    "    return out \n",
    "\n",
    "def block2(df):\n",
    "    df1 = df.copy()\n",
    "    df1['贷款与已还贷款比列']  = block(df1['贷款与已还贷款比列'])\n",
    "    return df1 \n",
    "\n",
    "data_1 = block2(data)\n",
    "data_2 = data_1[(data_1.贷款与已还贷款比列 != data_1.贷款与已还贷款比列.max()) & (data_1.贷款与已还贷款比列 > 0)]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>已批准贷款总额</th>\n",
       "      <th>已发放贷款总额</th>\n",
       "      <th>每月还款总额</th>\n",
       "      <th>贷款与已还贷款比列</th>\n",
       "      <th>主账户还款期数</th>\n",
       "      <th>次账户还款期数</th>\n",
       "      <th>贷款与已批准贷款比列</th>\n",
       "      <th>总贷款次数与总有效贷款次数比</th>\n",
       "      <th>工作类型</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>1.009940e+05</td>\n",
       "      <td>1.009940e+05</td>\n",
       "      <td>1.009940e+05</td>\n",
       "      <td>1.009940e+05</td>\n",
       "      <td>1.009940e+05</td>\n",
       "      <td>1.009940e+05</td>\n",
       "      <td>1.009940e+05</td>\n",
       "      <td>100994.000000</td>\n",
       "      <td>100994.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>4.546652e+05</td>\n",
       "      <td>4.536636e+05</td>\n",
       "      <td>2.658609e+04</td>\n",
       "      <td>2.722653e+03</td>\n",
       "      <td>1.000454e+05</td>\n",
       "      <td>5.790063e+03</td>\n",
       "      <td>1.092924e+03</td>\n",
       "      <td>1.867826</td>\n",
       "      <td>0.484801</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>3.544788e+06</td>\n",
       "      <td>3.549381e+06</td>\n",
       "      <td>2.145690e+05</td>\n",
       "      <td>4.187866e+04</td>\n",
       "      <td>3.199375e+06</td>\n",
       "      <td>1.497674e+05</td>\n",
       "      <td>1.604987e+05</td>\n",
       "      <td>0.932107</td>\n",
       "      <td>0.540803</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>1.500000e+04</td>\n",
       "      <td>1.450000e+04</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>3.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>1.250000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>6.500000e+04</td>\n",
       "      <td>6.368250e+04</td>\n",
       "      <td>2.035500e+03</td>\n",
       "      <td>1.250000e+00</td>\n",
       "      <td>2.400000e+01</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>1.670000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>3.240000e+05</td>\n",
       "      <td>3.200000e+05</td>\n",
       "      <td>8.604000e+03</td>\n",
       "      <td>2.030000e+00</td>\n",
       "      <td>1.370000e+04</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>1.000000e+09</td>\n",
       "      <td>1.000000e+09</td>\n",
       "      <td>2.564281e+07</td>\n",
       "      <td>5.000001e+06</td>\n",
       "      <td>1.000000e+09</td>\n",
       "      <td>1.980000e+07</td>\n",
       "      <td>5.000000e+07</td>\n",
       "      <td>18.000000</td>\n",
       "      <td>2.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            已批准贷款总额       已发放贷款总额        每月还款总额     贷款与已还贷款比列       主账户还款期数  \\\n",
       "count  1.009940e+05  1.009940e+05  1.009940e+05  1.009940e+05  1.009940e+05   \n",
       "mean   4.546652e+05  4.536636e+05  2.658609e+04  2.722653e+03  1.000454e+05   \n",
       "std    3.544788e+06  3.549381e+06  2.145690e+05  4.187866e+04  3.199375e+06   \n",
       "min    0.000000e+00  0.000000e+00  0.000000e+00  1.000000e+00  0.000000e+00   \n",
       "25%    1.500000e+04  1.450000e+04  0.000000e+00  1.000000e+00  3.000000e+00   \n",
       "50%    6.500000e+04  6.368250e+04  2.035500e+03  1.250000e+00  2.400000e+01   \n",
       "75%    3.240000e+05  3.200000e+05  8.604000e+03  2.030000e+00  1.370000e+04   \n",
       "max    1.000000e+09  1.000000e+09  2.564281e+07  5.000001e+06  1.000000e+09   \n",
       "\n",
       "            次账户还款期数    贷款与已批准贷款比列  总贷款次数与总有效贷款次数比           工作类型  \n",
       "count  1.009940e+05  1.009940e+05   100994.000000  100994.000000  \n",
       "mean   5.790063e+03  1.092924e+03        1.867826       0.484801  \n",
       "std    1.497674e+05  1.604987e+05        0.932107       0.540803  \n",
       "min    0.000000e+00  0.000000e+00        1.000000       0.000000  \n",
       "25%    0.000000e+00  1.000000e+00        1.250000       0.000000  \n",
       "50%    0.000000e+00  1.000000e+00        1.670000       0.000000  \n",
       "75%    0.000000e+00  1.000000e+00        2.000000       1.000000  \n",
       "max    1.980000e+07  5.000000e+07       18.000000       2.000000  "
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_2.describe().iloc[:,40:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "hidden": true,
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Int64Index: 100994 entries, 1 to 199716\n",
      "Data columns (total 49 columns):\n",
      " #   Column          Non-Null Count   Dtype  \n",
      "---  ------          --------------   -----  \n",
      " 0   客户编号            100994 non-null  int64  \n",
      " 1   已发货款            100994 non-null  int64  \n",
      " 2   资产成本            100994 non-null  int64  \n",
      " 3   贷款与资产比列         100994 non-null  float64\n",
      " 4   品牌              100994 non-null  int64  \n",
      " 5   骑车销售商           100994 non-null  int64  \n",
      " 6   车厂              100994 non-null  int64  \n",
      " 7   出生日期            100994 non-null  int64  \n",
      " 8   货款日期            100994 non-null  int64  \n",
      " 9   地区              100994 non-null  int64  \n",
      " 10  对接员工编号          100994 non-null  int64  \n",
      " 11  是否填写手机号         100994 non-null  int64  \n",
      " 12  受否填写身份证         100994 non-null  int64  \n",
      " 13  是否出具驾驶证         100994 non-null  int64  \n",
      " 14  是否填写护照          100994 non-null  int64  \n",
      " 15  信用评分            100994 non-null  int64  \n",
      " 16  主账户贷款次数         100994 non-null  int64  \n",
      " 17  主账户有效贷款次数       100994 non-null  int64  \n",
      " 18  主账户中尚未还清有效贷款    100994 non-null  int64  \n",
      " 19  主账户中已批准的贷款      100994 non-null  int64  \n",
      " 20  主账户中已发放贷款       100994 non-null  int64  \n",
      " 21  次账户贷款次数         100994 non-null  int64  \n",
      " 22  次账户有效贷款次数       100994 non-null  int64  \n",
      " 23  次账户中尚未还清有效贷款    100994 non-null  int64  \n",
      " 24  次账户中已批准贷款       100994 non-null  int64  \n",
      " 25  次账户中已发放贷款       100994 non-null  int64  \n",
      " 26  主账户每月还款         100994 non-null  int64  \n",
      " 27  次账户没用还款         100994 non-null  int64  \n",
      " 28  近六个月新贷款次数       100994 non-null  int64  \n",
      " 29  近六个月违约次数        100994 non-null  int64  \n",
      " 30  平均贷款期限          100994 non-null  int64  \n",
      " 31  第一次贷款距今时间       100994 non-null  int64  \n",
      " 32  贷款查询次数          100994 non-null  int64  \n",
      " 33  是否违约            100994 non-null  int64  \n",
      " 34  贷款与资产比          100994 non-null  float64\n",
      " 35  贷款总次数           100994 non-null  int64  \n",
      " 36  主账户无效贷款次数       100994 non-null  int64  \n",
      " 37  次账户无效贷款次数       100994 non-null  int64  \n",
      " 38  无效贷款总次数         100994 non-null  int64  \n",
      " 39  尚未还清有效贷款总额      100994 non-null  int64  \n",
      " 40  已批准贷款总额         100994 non-null  int64  \n",
      " 41  已发放贷款总额         100994 non-null  int64  \n",
      " 42  每月还款总额          100994 non-null  int64  \n",
      " 43  贷款与已还贷款比列       100994 non-null  float64\n",
      " 44  主账户还款期数         100994 non-null  int64  \n",
      " 45  次账户还款期数         100994 non-null  int64  \n",
      " 46  贷款与已批准贷款比列      100994 non-null  float64\n",
      " 47  总贷款次数与总有效贷款次数比  100994 non-null  float64\n",
      " 48  工作类型            100994 non-null  int64  \n",
      "dtypes: float64(5), int64(44)\n",
      "memory usage: 38.5 MB\n"
     ]
    }
   ],
   "source": [
    "#查看有无缺失 ： \n",
    "data_2.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "data_2.工作类型.value_counts()/data_2.工作类型.value_counts().sum()\n",
    "# 工作类型2占比2% 很少 直接删除 \n",
    "data_2_worktype = data_2.loc[(data_2.工作类型 != 2)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count    98838.000000\n",
       "mean       577.227645\n",
       "std        250.891976\n",
       "min          0.000000\n",
       "25%        471.000000\n",
       "50%        679.000000\n",
       "75%        738.000000\n",
       "max        890.000000\n",
       "Name: 信用评分, dtype: float64"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_2_worktype['信用评分'].describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "data_2_worktype[data_2_worktype.信用评分 == 0].count()/data_2_worktype.count()\n",
    "\n",
    "# 信用评分空值占比 0.9652% 量很少, 删 \n",
    "\n",
    "data_2_worktype_credit = data_2_worktype[data_2_worktype.信用评分 != 0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>index</th>\n",
       "      <th>客户编号</th>\n",
       "      <th>已发货款</th>\n",
       "      <th>资产成本</th>\n",
       "      <th>贷款与资产比列</th>\n",
       "      <th>品牌</th>\n",
       "      <th>骑车销售商</th>\n",
       "      <th>车厂</th>\n",
       "      <th>出生日期</th>\n",
       "      <th>货款日期</th>\n",
       "      <th>...</th>\n",
       "      <th>尚未还清有效贷款总额</th>\n",
       "      <th>已批准贷款总额</th>\n",
       "      <th>已发放贷款总额</th>\n",
       "      <th>每月还款总额</th>\n",
       "      <th>贷款与已还贷款比列</th>\n",
       "      <th>主账户还款期数</th>\n",
       "      <th>次账户还款期数</th>\n",
       "      <th>贷款与已批准贷款比列</th>\n",
       "      <th>总贷款次数与总有效贷款次数比</th>\n",
       "      <th>工作类型</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>519488</td>\n",
       "      <td>56759</td>\n",
       "      <td>65325</td>\n",
       "      <td>89.55</td>\n",
       "      <td>61</td>\n",
       "      <td>22778</td>\n",
       "      <td>86</td>\n",
       "      <td>1967</td>\n",
       "      <td>2018</td>\n",
       "      <td>...</td>\n",
       "      <td>2054139</td>\n",
       "      <td>2036500</td>\n",
       "      <td>2036500</td>\n",
       "      <td>34455</td>\n",
       "      <td>1.00</td>\n",
       "      <td>59</td>\n",
       "      <td>0</td>\n",
       "      <td>1.00</td>\n",
       "      <td>1.33</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>3</td>\n",
       "      <td>648134</td>\n",
       "      <td>72317</td>\n",
       "      <td>99750</td>\n",
       "      <td>73.68</td>\n",
       "      <td>76</td>\n",
       "      <td>17242</td>\n",
       "      <td>48</td>\n",
       "      <td>1995</td>\n",
       "      <td>2018</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>13813</td>\n",
       "      <td>13813</td>\n",
       "      <td>0</td>\n",
       "      <td>13814.00</td>\n",
       "      <td>13813</td>\n",
       "      <td>0</td>\n",
       "      <td>1.00</td>\n",
       "      <td>2.00</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>4</td>\n",
       "      <td>458210</td>\n",
       "      <td>50078</td>\n",
       "      <td>65450</td>\n",
       "      <td>79.45</td>\n",
       "      <td>146</td>\n",
       "      <td>14181</td>\n",
       "      <td>45</td>\n",
       "      <td>1974</td>\n",
       "      <td>2018</td>\n",
       "      <td>...</td>\n",
       "      <td>467161</td>\n",
       "      <td>550000</td>\n",
       "      <td>550000</td>\n",
       "      <td>12863</td>\n",
       "      <td>1.18</td>\n",
       "      <td>42</td>\n",
       "      <td>0</td>\n",
       "      <td>1.00</td>\n",
       "      <td>1.06</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>5</td>\n",
       "      <td>616513</td>\n",
       "      <td>63882</td>\n",
       "      <td>79605</td>\n",
       "      <td>82.91</td>\n",
       "      <td>152</td>\n",
       "      <td>14470</td>\n",
       "      <td>51</td>\n",
       "      <td>1993</td>\n",
       "      <td>2018</td>\n",
       "      <td>...</td>\n",
       "      <td>16225</td>\n",
       "      <td>17700</td>\n",
       "      <td>17700</td>\n",
       "      <td>1475</td>\n",
       "      <td>1.09</td>\n",
       "      <td>11</td>\n",
       "      <td>0</td>\n",
       "      <td>1.00</td>\n",
       "      <td>1.50</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>6</td>\n",
       "      <td>453368</td>\n",
       "      <td>54013</td>\n",
       "      <td>62371</td>\n",
       "      <td>89.79</td>\n",
       "      <td>34</td>\n",
       "      <td>16556</td>\n",
       "      <td>86</td>\n",
       "      <td>1971</td>\n",
       "      <td>2018</td>\n",
       "      <td>...</td>\n",
       "      <td>12991</td>\n",
       "      <td>100000</td>\n",
       "      <td>100000</td>\n",
       "      <td>3207</td>\n",
       "      <td>7.70</td>\n",
       "      <td>31</td>\n",
       "      <td>0</td>\n",
       "      <td>1.00</td>\n",
       "      <td>1.33</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>97879</th>\n",
       "      <td>199707</td>\n",
       "      <td>482490</td>\n",
       "      <td>55413</td>\n",
       "      <td>69086</td>\n",
       "      <td>82.51</td>\n",
       "      <td>67</td>\n",
       "      <td>21308</td>\n",
       "      <td>86</td>\n",
       "      <td>1992</td>\n",
       "      <td>2018</td>\n",
       "      <td>...</td>\n",
       "      <td>400400</td>\n",
       "      <td>403000</td>\n",
       "      <td>403000</td>\n",
       "      <td>4579</td>\n",
       "      <td>1.01</td>\n",
       "      <td>87</td>\n",
       "      <td>0</td>\n",
       "      <td>1.00</td>\n",
       "      <td>2.00</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>97880</th>\n",
       "      <td>199709</td>\n",
       "      <td>470368</td>\n",
       "      <td>52199</td>\n",
       "      <td>63387</td>\n",
       "      <td>88.35</td>\n",
       "      <td>101</td>\n",
       "      <td>24379</td>\n",
       "      <td>86</td>\n",
       "      <td>1985</td>\n",
       "      <td>2018</td>\n",
       "      <td>...</td>\n",
       "      <td>72033</td>\n",
       "      <td>75000</td>\n",
       "      <td>80288</td>\n",
       "      <td>5354</td>\n",
       "      <td>1.11</td>\n",
       "      <td>14</td>\n",
       "      <td>0</td>\n",
       "      <td>1.07</td>\n",
       "      <td>1.75</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>97881</th>\n",
       "      <td>199711</td>\n",
       "      <td>432468</td>\n",
       "      <td>63447</td>\n",
       "      <td>73701</td>\n",
       "      <td>88.19</td>\n",
       "      <td>13</td>\n",
       "      <td>14614</td>\n",
       "      <td>86</td>\n",
       "      <td>1976</td>\n",
       "      <td>2018</td>\n",
       "      <td>...</td>\n",
       "      <td>128709</td>\n",
       "      <td>214103</td>\n",
       "      <td>214103</td>\n",
       "      <td>354750</td>\n",
       "      <td>1.66</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.00</td>\n",
       "      <td>1.44</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>97882</th>\n",
       "      <td>199712</td>\n",
       "      <td>436304</td>\n",
       "      <td>36439</td>\n",
       "      <td>60424</td>\n",
       "      <td>62.89</td>\n",
       "      <td>10</td>\n",
       "      <td>23507</td>\n",
       "      <td>45</td>\n",
       "      <td>1986</td>\n",
       "      <td>2018</td>\n",
       "      <td>...</td>\n",
       "      <td>592668</td>\n",
       "      <td>525000</td>\n",
       "      <td>525000</td>\n",
       "      <td>0</td>\n",
       "      <td>1.00</td>\n",
       "      <td>525000</td>\n",
       "      <td>0</td>\n",
       "      <td>1.00</td>\n",
       "      <td>3.00</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>97883</th>\n",
       "      <td>199714</td>\n",
       "      <td>466468</td>\n",
       "      <td>54413</td>\n",
       "      <td>62710</td>\n",
       "      <td>89.30</td>\n",
       "      <td>67</td>\n",
       "      <td>16565</td>\n",
       "      <td>45</td>\n",
       "      <td>1973</td>\n",
       "      <td>2018</td>\n",
       "      <td>...</td>\n",
       "      <td>1185601</td>\n",
       "      <td>1220000</td>\n",
       "      <td>1220000</td>\n",
       "      <td>2500</td>\n",
       "      <td>1.03</td>\n",
       "      <td>487</td>\n",
       "      <td>0</td>\n",
       "      <td>1.00</td>\n",
       "      <td>3.00</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>97884 rows × 50 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        index    客户编号   已发货款   资产成本  贷款与资产比列   品牌  骑车销售商  车厂  出生日期  货款日期  ...  \\\n",
       "0           1  519488  56759  65325    89.55   61  22778  86  1967  2018  ...   \n",
       "1           3  648134  72317  99750    73.68   76  17242  48  1995  2018  ...   \n",
       "2           4  458210  50078  65450    79.45  146  14181  45  1974  2018  ...   \n",
       "3           5  616513  63882  79605    82.91  152  14470  51  1993  2018  ...   \n",
       "4           6  453368  54013  62371    89.79   34  16556  86  1971  2018  ...   \n",
       "...       ...     ...    ...    ...      ...  ...    ...  ..   ...   ...  ...   \n",
       "97879  199707  482490  55413  69086    82.51   67  21308  86  1992  2018  ...   \n",
       "97880  199709  470368  52199  63387    88.35  101  24379  86  1985  2018  ...   \n",
       "97881  199711  432468  63447  73701    88.19   13  14614  86  1976  2018  ...   \n",
       "97882  199712  436304  36439  60424    62.89   10  23507  45  1986  2018  ...   \n",
       "97883  199714  466468  54413  62710    89.30   67  16565  45  1973  2018  ...   \n",
       "\n",
       "       尚未还清有效贷款总额  已批准贷款总额  已发放贷款总额  每月还款总额  贷款与已还贷款比列  主账户还款期数  次账户还款期数  \\\n",
       "0         2054139  2036500  2036500   34455       1.00       59        0   \n",
       "1               0    13813    13813       0   13814.00    13813        0   \n",
       "2          467161   550000   550000   12863       1.18       42        0   \n",
       "3           16225    17700    17700    1475       1.09       11        0   \n",
       "4           12991   100000   100000    3207       7.70       31        0   \n",
       "...           ...      ...      ...     ...        ...      ...      ...   \n",
       "97879      400400   403000   403000    4579       1.01       87        0   \n",
       "97880       72033    75000    80288    5354       1.11       14        0   \n",
       "97881      128709   214103   214103  354750       1.66        0        0   \n",
       "97882      592668   525000   525000       0       1.00   525000        0   \n",
       "97883     1185601  1220000  1220000    2500       1.03      487        0   \n",
       "\n",
       "       贷款与已批准贷款比列  总贷款次数与总有效贷款次数比  工作类型  \n",
       "0            1.00            1.33     1  \n",
       "1            1.00            2.00     0  \n",
       "2            1.00            1.06     1  \n",
       "3            1.00            1.50     0  \n",
       "4            1.00            1.33     1  \n",
       "...           ...             ...   ...  \n",
       "97879        1.00            2.00     1  \n",
       "97880        1.07            1.75     0  \n",
       "97881        1.00            1.44     1  \n",
       "97882        1.00            3.00     0  \n",
       "97883        1.00            3.00     1  \n",
       "\n",
       "[97884 rows x 50 columns]"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_2_worktype_credit.reset_index()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "data_2_worktype_credit_reloan = data_2_worktype_credit[data_2_worktype_credit.尚未还清有效贷款总额 >= 0 ]\n",
    "data_select = data_2_worktype_credit_reloan[data_2_worktype_credit_reloan.贷款与已还贷款比列 <=200]\n",
    "\n",
    "data_select = data_select.reset_index(drop = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>count</th>\n",
       "      <th>mean</th>\n",
       "      <th>std</th>\n",
       "      <th>min</th>\n",
       "      <th>25%</th>\n",
       "      <th>50%</th>\n",
       "      <th>75%</th>\n",
       "      <th>max</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>客户编号</th>\n",
       "      <td>93936.0</td>\n",
       "      <td>534895.085718</td>\n",
       "      <td>6.824365e+04</td>\n",
       "      <td>417428.00000</td>\n",
       "      <td>476322.750000</td>\n",
       "      <td>533961.000000</td>\n",
       "      <td>593452.750000</td>\n",
       "      <td>6.710330e+05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>已发货款</th>\n",
       "      <td>93936.0</td>\n",
       "      <td>54495.729039</td>\n",
       "      <td>1.310298e+04</td>\n",
       "      <td>13652.00000</td>\n",
       "      <td>47349.000000</td>\n",
       "      <td>53803.500000</td>\n",
       "      <td>60289.500000</td>\n",
       "      <td>9.873540e+05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>资产成本</th>\n",
       "      <td>93936.0</td>\n",
       "      <td>74759.211165</td>\n",
       "      <td>1.870832e+04</td>\n",
       "      <td>37000.00000</td>\n",
       "      <td>65319.500000</td>\n",
       "      <td>70131.000000</td>\n",
       "      <td>77532.250000</td>\n",
       "      <td>1.328954e+06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>贷款与资产比列</th>\n",
       "      <td>93936.0</td>\n",
       "      <td>75.903667</td>\n",
       "      <td>1.115103e+01</td>\n",
       "      <td>13.50000</td>\n",
       "      <td>70.120000</td>\n",
       "      <td>78.240000</td>\n",
       "      <td>84.530000</td>\n",
       "      <td>9.500000e+01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>品牌</th>\n",
       "      <td>93936.0</td>\n",
       "      <td>71.710409</td>\n",
       "      <td>6.733362e+01</td>\n",
       "      <td>1.00000</td>\n",
       "      <td>14.000000</td>\n",
       "      <td>63.000000</td>\n",
       "      <td>135.000000</td>\n",
       "      <td>2.610000e+02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>骑车销售商</th>\n",
       "      <td>93936.0</td>\n",
       "      <td>19344.164112</td>\n",
       "      <td>3.518321e+03</td>\n",
       "      <td>10524.00000</td>\n",
       "      <td>16120.000000</td>\n",
       "      <td>18532.000000</td>\n",
       "      <td>22892.000000</td>\n",
       "      <td>2.480300e+04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>车厂</th>\n",
       "      <td>93936.0</td>\n",
       "      <td>70.842584</td>\n",
       "      <td>2.223694e+01</td>\n",
       "      <td>45.00000</td>\n",
       "      <td>48.000000</td>\n",
       "      <td>86.000000</td>\n",
       "      <td>86.000000</td>\n",
       "      <td>1.560000e+02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>出生日期</th>\n",
       "      <td>93936.0</td>\n",
       "      <td>1981.717744</td>\n",
       "      <td>9.418742e+00</td>\n",
       "      <td>1954.00000</td>\n",
       "      <td>1975.000000</td>\n",
       "      <td>1983.000000</td>\n",
       "      <td>1989.000000</td>\n",
       "      <td>1.997000e+03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>货款日期</th>\n",
       "      <td>93936.0</td>\n",
       "      <td>2018.000000</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>2018.00000</td>\n",
       "      <td>2018.000000</td>\n",
       "      <td>2018.000000</td>\n",
       "      <td>2018.000000</td>\n",
       "      <td>2.018000e+03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>地区</th>\n",
       "      <td>93936.0</td>\n",
       "      <td>7.054133</td>\n",
       "      <td>4.291971e+00</td>\n",
       "      <td>1.00000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>6.000000</td>\n",
       "      <td>9.000000</td>\n",
       "      <td>2.200000e+01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>对接员工编号</th>\n",
       "      <td>93936.0</td>\n",
       "      <td>1546.237034</td>\n",
       "      <td>9.774775e+02</td>\n",
       "      <td>1.00000</td>\n",
       "      <td>704.000000</td>\n",
       "      <td>1437.000000</td>\n",
       "      <td>2357.000000</td>\n",
       "      <td>3.795000e+03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>是否填写手机号</th>\n",
       "      <td>93936.0</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>1.00000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000e+00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>受否填写身份证</th>\n",
       "      <td>93936.0</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>1.00000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000e+00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>是否出具驾驶证</th>\n",
       "      <td>93936.0</td>\n",
       "      <td>0.025581</td>\n",
       "      <td>1.578832e-01</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.000000e+00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>是否填写护照</th>\n",
       "      <td>93936.0</td>\n",
       "      <td>0.002789</td>\n",
       "      <td>5.273882e-02</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.000000e+00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>信用评分</th>\n",
       "      <td>93936.0</td>\n",
       "      <td>581.163505</td>\n",
       "      <td>2.455436e+02</td>\n",
       "      <td>11.00000</td>\n",
       "      <td>481.000000</td>\n",
       "      <td>679.000000</td>\n",
       "      <td>738.000000</td>\n",
       "      <td>8.900000e+02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>主账户贷款次数</th>\n",
       "      <td>93936.0</td>\n",
       "      <td>4.984362</td>\n",
       "      <td>6.663093e+00</td>\n",
       "      <td>1.00000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>6.000000</td>\n",
       "      <td>4.530000e+02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>主账户有效贷款次数</th>\n",
       "      <td>93936.0</td>\n",
       "      <td>2.129897</td>\n",
       "      <td>2.367280e+00</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>1.440000e+02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>主账户中尚未还清有效贷款</th>\n",
       "      <td>93936.0</td>\n",
       "      <td>352485.646600</td>\n",
       "      <td>1.371920e+06</td>\n",
       "      <td>-3473.00000</td>\n",
       "      <td>4752.500000</td>\n",
       "      <td>40366.500000</td>\n",
       "      <td>220849.000000</td>\n",
       "      <td>9.652492e+07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>主账户中已批准的贷款</th>\n",
       "      <td>93936.0</td>\n",
       "      <td>450104.649687</td>\n",
       "      <td>1.657564e+06</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>14703.000000</td>\n",
       "      <td>67984.500000</td>\n",
       "      <td>335213.000000</td>\n",
       "      <td>1.058657e+08</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>主账户中已发放贷款</th>\n",
       "      <td>93936.0</td>\n",
       "      <td>449395.714018</td>\n",
       "      <td>1.668141e+06</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>13733.750000</td>\n",
       "      <td>66000.000000</td>\n",
       "      <td>332000.000000</td>\n",
       "      <td>1.057557e+08</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>次账户贷款次数</th>\n",
       "      <td>93936.0</td>\n",
       "      <td>0.101292</td>\n",
       "      <td>8.345241e-01</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>5.200000e+01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>次账户有效贷款次数</th>\n",
       "      <td>93936.0</td>\n",
       "      <td>0.046510</td>\n",
       "      <td>4.065231e-01</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>2.200000e+01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>次账户中尚未还清有效贷款</th>\n",
       "      <td>93936.0</td>\n",
       "      <td>9948.593564</td>\n",
       "      <td>2.280848e+05</td>\n",
       "      <td>-155527.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>3.603285e+07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>次账户中已批准贷款</th>\n",
       "      <td>93936.0</td>\n",
       "      <td>13238.928345</td>\n",
       "      <td>2.394704e+05</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>2.500000e+07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>次账户中已发放贷款</th>\n",
       "      <td>93936.0</td>\n",
       "      <td>13027.093585</td>\n",
       "      <td>2.387015e+05</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>2.500000e+07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>主账户每月还款</th>\n",
       "      <td>93936.0</td>\n",
       "      <td>26433.294733</td>\n",
       "      <td>2.143206e+05</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>2101.000000</td>\n",
       "      <td>8740.000000</td>\n",
       "      <td>2.564281e+07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>次账户没用还款</th>\n",
       "      <td>93936.0</td>\n",
       "      <td>544.681911</td>\n",
       "      <td>1.851970e+04</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>3.246710e+06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>近六个月新贷款次数</th>\n",
       "      <td>93936.0</td>\n",
       "      <td>0.795414</td>\n",
       "      <td>1.252811e+00</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>3.500000e+01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>近六个月违约次数</th>\n",
       "      <td>93936.0</td>\n",
       "      <td>0.195963</td>\n",
       "      <td>5.270630e-01</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>2.000000e+01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>平均贷款期限</th>\n",
       "      <td>93936.0</td>\n",
       "      <td>15.508942</td>\n",
       "      <td>1.548740e+01</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>5.000000</td>\n",
       "      <td>13.000000</td>\n",
       "      <td>20.000000</td>\n",
       "      <td>1.170000e+02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>第一次贷款距今时间</th>\n",
       "      <td>93936.0</td>\n",
       "      <td>25.825211</td>\n",
       "      <td>2.329152e+01</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>8.000000</td>\n",
       "      <td>19.000000</td>\n",
       "      <td>37.000000</td>\n",
       "      <td>1.170000e+02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>贷款查询次数</th>\n",
       "      <td>93936.0</td>\n",
       "      <td>0.341562</td>\n",
       "      <td>9.149956e-01</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>2.800000e+01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>是否违约</th>\n",
       "      <td>93936.0</td>\n",
       "      <td>0.164921</td>\n",
       "      <td>3.711110e-01</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.000000e+00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>贷款与资产比</th>\n",
       "      <td>93936.0</td>\n",
       "      <td>0.735993</td>\n",
       "      <td>1.103615e-01</td>\n",
       "      <td>0.12413</td>\n",
       "      <td>0.679706</td>\n",
       "      <td>0.756252</td>\n",
       "      <td>0.820601</td>\n",
       "      <td>9.372930e-01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>贷款总次数</th>\n",
       "      <td>93936.0</td>\n",
       "      <td>5.085654</td>\n",
       "      <td>6.742037e+00</td>\n",
       "      <td>1.00000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>6.000000</td>\n",
       "      <td>4.530000e+02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>主账户无效贷款次数</th>\n",
       "      <td>93936.0</td>\n",
       "      <td>2.854465</td>\n",
       "      <td>5.377420e+00</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>4.510000e+02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>次账户无效贷款次数</th>\n",
       "      <td>93936.0</td>\n",
       "      <td>0.054782</td>\n",
       "      <td>5.487672e-01</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>4.200000e+01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>无效贷款总次数</th>\n",
       "      <td>93936.0</td>\n",
       "      <td>2.909247</td>\n",
       "      <td>5.418473e+00</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>4.510000e+02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>尚未还清有效贷款总额</th>\n",
       "      <td>93936.0</td>\n",
       "      <td>362434.240164</td>\n",
       "      <td>1.394011e+06</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>5155.250000</td>\n",
       "      <td>42000.000000</td>\n",
       "      <td>231873.250000</td>\n",
       "      <td>9.652492e+07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>已批准贷款总额</th>\n",
       "      <td>93936.0</td>\n",
       "      <td>463343.578032</td>\n",
       "      <td>1.679415e+06</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>15000.000000</td>\n",
       "      <td>70000.000000</td>\n",
       "      <td>350000.000000</td>\n",
       "      <td>1.058657e+08</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>已发放贷款总额</th>\n",
       "      <td>93936.0</td>\n",
       "      <td>462422.807603</td>\n",
       "      <td>1.689696e+06</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>14490.000000</td>\n",
       "      <td>68860.500000</td>\n",
       "      <td>348400.500000</td>\n",
       "      <td>1.057557e+08</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>每月还款总额</th>\n",
       "      <td>93936.0</td>\n",
       "      <td>26977.976644</td>\n",
       "      <td>2.153531e+05</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>2160.000000</td>\n",
       "      <td>8994.000000</td>\n",
       "      <td>2.564281e+07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>贷款与已还贷款比列</th>\n",
       "      <td>93936.0</td>\n",
       "      <td>2.599336</td>\n",
       "      <td>7.383551e+00</td>\n",
       "      <td>1.00000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.230000</td>\n",
       "      <td>1.920000</td>\n",
       "      <td>1.990000e+02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>主账户还款期数</th>\n",
       "      <td>93936.0</td>\n",
       "      <td>93379.705800</td>\n",
       "      <td>5.973408e+05</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>23.000000</td>\n",
       "      <td>9600.500000</td>\n",
       "      <td>5.326646e+07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>次账户还款期数</th>\n",
       "      <td>93936.0</td>\n",
       "      <td>5170.498542</td>\n",
       "      <td>1.505532e+05</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.980000e+07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>贷款与已批准贷款比列</th>\n",
       "      <td>93936.0</td>\n",
       "      <td>1164.677315</td>\n",
       "      <td>1.664141e+05</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>5.000000e+07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>总贷款次数与总有效贷款次数比</th>\n",
       "      <td>93936.0</td>\n",
       "      <td>1.876879</td>\n",
       "      <td>9.497835e-01</td>\n",
       "      <td>1.00000</td>\n",
       "      <td>1.250000</td>\n",
       "      <td>1.670000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>1.800000e+01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>工作类型</th>\n",
       "      <td>93936.0</td>\n",
       "      <td>0.452127</td>\n",
       "      <td>4.977055e-01</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000e+00</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                  count           mean           std           min  \\\n",
       "客户编号            93936.0  534895.085718  6.824365e+04  417428.00000   \n",
       "已发货款            93936.0   54495.729039  1.310298e+04   13652.00000   \n",
       "资产成本            93936.0   74759.211165  1.870832e+04   37000.00000   \n",
       "贷款与资产比列         93936.0      75.903667  1.115103e+01      13.50000   \n",
       "品牌              93936.0      71.710409  6.733362e+01       1.00000   \n",
       "骑车销售商           93936.0   19344.164112  3.518321e+03   10524.00000   \n",
       "车厂              93936.0      70.842584  2.223694e+01      45.00000   \n",
       "出生日期            93936.0    1981.717744  9.418742e+00    1954.00000   \n",
       "货款日期            93936.0    2018.000000  0.000000e+00    2018.00000   \n",
       "地区              93936.0       7.054133  4.291971e+00       1.00000   \n",
       "对接员工编号          93936.0    1546.237034  9.774775e+02       1.00000   \n",
       "是否填写手机号         93936.0       1.000000  0.000000e+00       1.00000   \n",
       "受否填写身份证         93936.0       1.000000  0.000000e+00       1.00000   \n",
       "是否出具驾驶证         93936.0       0.025581  1.578832e-01       0.00000   \n",
       "是否填写护照          93936.0       0.002789  5.273882e-02       0.00000   \n",
       "信用评分            93936.0     581.163505  2.455436e+02      11.00000   \n",
       "主账户贷款次数         93936.0       4.984362  6.663093e+00       1.00000   \n",
       "主账户有效贷款次数       93936.0       2.129897  2.367280e+00       0.00000   \n",
       "主账户中尚未还清有效贷款    93936.0  352485.646600  1.371920e+06   -3473.00000   \n",
       "主账户中已批准的贷款      93936.0  450104.649687  1.657564e+06       0.00000   \n",
       "主账户中已发放贷款       93936.0  449395.714018  1.668141e+06       0.00000   \n",
       "次账户贷款次数         93936.0       0.101292  8.345241e-01       0.00000   \n",
       "次账户有效贷款次数       93936.0       0.046510  4.065231e-01       0.00000   \n",
       "次账户中尚未还清有效贷款    93936.0    9948.593564  2.280848e+05 -155527.00000   \n",
       "次账户中已批准贷款       93936.0   13238.928345  2.394704e+05       0.00000   \n",
       "次账户中已发放贷款       93936.0   13027.093585  2.387015e+05       0.00000   \n",
       "主账户每月还款         93936.0   26433.294733  2.143206e+05       0.00000   \n",
       "次账户没用还款         93936.0     544.681911  1.851970e+04       0.00000   \n",
       "近六个月新贷款次数       93936.0       0.795414  1.252811e+00       0.00000   \n",
       "近六个月违约次数        93936.0       0.195963  5.270630e-01       0.00000   \n",
       "平均贷款期限          93936.0      15.508942  1.548740e+01       0.00000   \n",
       "第一次贷款距今时间       93936.0      25.825211  2.329152e+01       0.00000   \n",
       "贷款查询次数          93936.0       0.341562  9.149956e-01       0.00000   \n",
       "是否违约            93936.0       0.164921  3.711110e-01       0.00000   \n",
       "贷款与资产比          93936.0       0.735993  1.103615e-01       0.12413   \n",
       "贷款总次数           93936.0       5.085654  6.742037e+00       1.00000   \n",
       "主账户无效贷款次数       93936.0       2.854465  5.377420e+00       0.00000   \n",
       "次账户无效贷款次数       93936.0       0.054782  5.487672e-01       0.00000   \n",
       "无效贷款总次数         93936.0       2.909247  5.418473e+00       0.00000   \n",
       "尚未还清有效贷款总额      93936.0  362434.240164  1.394011e+06       0.00000   \n",
       "已批准贷款总额         93936.0  463343.578032  1.679415e+06       0.00000   \n",
       "已发放贷款总额         93936.0  462422.807603  1.689696e+06       0.00000   \n",
       "每月还款总额          93936.0   26977.976644  2.153531e+05       0.00000   \n",
       "贷款与已还贷款比列       93936.0       2.599336  7.383551e+00       1.00000   \n",
       "主账户还款期数         93936.0   93379.705800  5.973408e+05       0.00000   \n",
       "次账户还款期数         93936.0    5170.498542  1.505532e+05       0.00000   \n",
       "贷款与已批准贷款比列      93936.0    1164.677315  1.664141e+05       0.00000   \n",
       "总贷款次数与总有效贷款次数比  93936.0       1.876879  9.497835e-01       1.00000   \n",
       "工作类型            93936.0       0.452127  4.977055e-01       0.00000   \n",
       "\n",
       "                          25%            50%            75%           max  \n",
       "客户编号            476322.750000  533961.000000  593452.750000  6.710330e+05  \n",
       "已发货款             47349.000000   53803.500000   60289.500000  9.873540e+05  \n",
       "资产成本             65319.500000   70131.000000   77532.250000  1.328954e+06  \n",
       "贷款与资产比列             70.120000      78.240000      84.530000  9.500000e+01  \n",
       "品牌                  14.000000      63.000000     135.000000  2.610000e+02  \n",
       "骑车销售商            16120.000000   18532.000000   22892.000000  2.480300e+04  \n",
       "车厂                  48.000000      86.000000      86.000000  1.560000e+02  \n",
       "出生日期              1975.000000    1983.000000    1989.000000  1.997000e+03  \n",
       "货款日期              2018.000000    2018.000000    2018.000000  2.018000e+03  \n",
       "地区                   4.000000       6.000000       9.000000  2.200000e+01  \n",
       "对接员工编号             704.000000    1437.000000    2357.000000  3.795000e+03  \n",
       "是否填写手机号              1.000000       1.000000       1.000000  1.000000e+00  \n",
       "受否填写身份证              1.000000       1.000000       1.000000  1.000000e+00  \n",
       "是否出具驾驶证              0.000000       0.000000       0.000000  1.000000e+00  \n",
       "是否填写护照               0.000000       0.000000       0.000000  1.000000e+00  \n",
       "信用评分               481.000000     679.000000     738.000000  8.900000e+02  \n",
       "主账户贷款次数              1.000000       3.000000       6.000000  4.530000e+02  \n",
       "主账户有效贷款次数            1.000000       1.000000       3.000000  1.440000e+02  \n",
       "主账户中尚未还清有效贷款      4752.500000   40366.500000  220849.000000  9.652492e+07  \n",
       "主账户中已批准的贷款       14703.000000   67984.500000  335213.000000  1.058657e+08  \n",
       "主账户中已发放贷款        13733.750000   66000.000000  332000.000000  1.057557e+08  \n",
       "次账户贷款次数              0.000000       0.000000       0.000000  5.200000e+01  \n",
       "次账户有效贷款次数            0.000000       0.000000       0.000000  2.200000e+01  \n",
       "次账户中尚未还清有效贷款         0.000000       0.000000       0.000000  3.603285e+07  \n",
       "次账户中已批准贷款            0.000000       0.000000       0.000000  2.500000e+07  \n",
       "次账户中已发放贷款            0.000000       0.000000       0.000000  2.500000e+07  \n",
       "主账户每月还款              0.000000    2101.000000    8740.000000  2.564281e+07  \n",
       "次账户没用还款              0.000000       0.000000       0.000000  3.246710e+06  \n",
       "近六个月新贷款次数            0.000000       0.000000       1.000000  3.500000e+01  \n",
       "近六个月违约次数             0.000000       0.000000       0.000000  2.000000e+01  \n",
       "平均贷款期限               5.000000      13.000000      20.000000  1.170000e+02  \n",
       "第一次贷款距今时间            8.000000      19.000000      37.000000  1.170000e+02  \n",
       "贷款查询次数               0.000000       0.000000       0.000000  2.800000e+01  \n",
       "是否违约                 0.000000       0.000000       0.000000  1.000000e+00  \n",
       "贷款与资产比               0.679706       0.756252       0.820601  9.372930e-01  \n",
       "贷款总次数                1.000000       3.000000       6.000000  4.530000e+02  \n",
       "主账户无效贷款次数            0.000000       1.000000       3.000000  4.510000e+02  \n",
       "次账户无效贷款次数            0.000000       0.000000       0.000000  4.200000e+01  \n",
       "无效贷款总次数              0.000000       1.000000       3.000000  4.510000e+02  \n",
       "尚未还清有效贷款总额        5155.250000   42000.000000  231873.250000  9.652492e+07  \n",
       "已批准贷款总额          15000.000000   70000.000000  350000.000000  1.058657e+08  \n",
       "已发放贷款总额          14490.000000   68860.500000  348400.500000  1.057557e+08  \n",
       "每月还款总额               0.000000    2160.000000    8994.000000  2.564281e+07  \n",
       "贷款与已还贷款比列            1.000000       1.230000       1.920000  1.990000e+02  \n",
       "主账户还款期数              3.000000      23.000000    9600.500000  5.326646e+07  \n",
       "次账户还款期数              0.000000       0.000000       0.000000  1.980000e+07  \n",
       "贷款与已批准贷款比列           1.000000       1.000000       1.000000  5.000000e+07  \n",
       "总贷款次数与总有效贷款次数比       1.250000       1.670000       2.000000  1.800000e+01  \n",
       "工作类型                 0.000000       0.000000       1.000000  1.000000e+00  "
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_select.describe().T"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "heading_collapsed": true
   },
   "source": [
    "##  衍生字段"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "hidden": true,
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# 年龄分段\n",
    "\n",
    "def age_trans(df): \n",
    "    ages = df['货款日期'] - df['出生日期']\n",
    "    age  = []\n",
    "    # 大学生段\n",
    "    for i in range(len(ages)): \n",
    "        if ages[i] <= 23 :  \n",
    "            age.append(1) \n",
    "        #刚工作\n",
    "        elif ages[i] <= 26:\n",
    "            age.append(2) \n",
    "        #稳定期\n",
    "        elif ages[i] <= 30:\n",
    "            age.append(3)\n",
    "\n",
    "        elif ages[i] <= 40:\n",
    "            age.append(4)\n",
    "\n",
    "        elif ages[i] <= 50:\n",
    "            age.append(5)\n",
    "\n",
    "        else:\n",
    "            age.append(6)\n",
    "    agex = pd.DataFrame(age)\n",
    "    return agex \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "train_set = pd.DataFrame()\n",
    "train_set['是否违约'] = data_select['是否违约']\n",
    "train_set['信用评分'] = data_select['信用评分']\n",
    "train_set['工作类型'] = data_select['工作类型']\n",
    "train_set['是否出具驾驶证'] = data_select['是否出具驾驶证']\n",
    "train_set['是否填写护照'] = data_select['是否填写护照']\n",
    "train_set['年龄区间'] = age_trans(data_select)\n",
    "train_set['总贷款次数'] = data_select['主账户贷款次数']+data_select['次账户贷款次数']\n",
    "train_set['总有效贷款次数'] = data_select['主账户有效贷款次数']+data_select['次账户有效贷款次数']\n",
    "train_set['贷款成功率'] = round(train_set['总有效贷款次数']/train_set['总贷款次数'],4)\n",
    "train_set['贷款与资产比'] =data_select['贷款与资产比']\n",
    "train_set['总还款期数'] = data_select['主账户还款期数']+data_select['次账户还款期数']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>是否违约</th>\n",
       "      <th>信用评分</th>\n",
       "      <th>工作类型</th>\n",
       "      <th>是否出具驾驶证</th>\n",
       "      <th>是否填写护照</th>\n",
       "      <th>年龄区间</th>\n",
       "      <th>总贷款次数</th>\n",
       "      <th>总有效贷款次数</th>\n",
       "      <th>贷款成功率</th>\n",
       "      <th>贷款与资产比</th>\n",
       "      <th>总还款期数</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>300</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "      <td>2</td>\n",
       "      <td>0.2857</td>\n",
       "      <td>0.868871</td>\n",
       "      <td>59</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>379</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>16</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0625</td>\n",
       "      <td>0.765134</td>\n",
       "      <td>42</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>749</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>0.5000</td>\n",
       "      <td>0.802487</td>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>300</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>0.3333</td>\n",
       "      <td>0.865995</td>\n",
       "      <td>31</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>392</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>11</td>\n",
       "      <td>3</td>\n",
       "      <td>0.2727</td>\n",
       "      <td>0.822649</td>\n",
       "      <td>45</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>1</td>\n",
       "      <td>589</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>1.0000</td>\n",
       "      <td>0.870320</td>\n",
       "      <td>69516</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>1</td>\n",
       "      <td>663</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1.0000</td>\n",
       "      <td>0.738645</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>1</td>\n",
       "      <td>726</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>6</td>\n",
       "      <td>1</td>\n",
       "      <td>0.1667</td>\n",
       "      <td>0.774071</td>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>1</td>\n",
       "      <td>660</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>0.6667</td>\n",
       "      <td>0.732499</td>\n",
       "      <td>679</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>1</td>\n",
       "      <td>725</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>1.0000</td>\n",
       "      <td>0.827466</td>\n",
       "      <td>191689</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>1</td>\n",
       "      <td>706</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>9</td>\n",
       "      <td>3</td>\n",
       "      <td>0.3333</td>\n",
       "      <td>0.866662</td>\n",
       "      <td>13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>1</td>\n",
       "      <td>825</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0000</td>\n",
       "      <td>0.881791</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>1</td>\n",
       "      <td>496</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>0.7500</td>\n",
       "      <td>0.860756</td>\n",
       "      <td>50</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>1</td>\n",
       "      <td>672</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>25</td>\n",
       "      <td>3</td>\n",
       "      <td>0.1200</td>\n",
       "      <td>0.736619</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>1</td>\n",
       "      <td>15</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0000</td>\n",
       "      <td>0.830160</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>1</td>\n",
       "      <td>836</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0000</td>\n",
       "      <td>0.872813</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>1</td>\n",
       "      <td>18</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0000</td>\n",
       "      <td>0.717786</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>1</td>\n",
       "      <td>15</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1.0000</td>\n",
       "      <td>0.767190</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>1</td>\n",
       "      <td>726</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>0.6667</td>\n",
       "      <td>0.734571</td>\n",
       "      <td>57</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>1</td>\n",
       "      <td>738</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1.0000</td>\n",
       "      <td>0.628355</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    是否违约  信用评分  工作类型  是否出具驾驶证  是否填写护照  年龄区间  总贷款次数  总有效贷款次数   贷款成功率    贷款与资产比  \\\n",
       "0      1   300     1        0       0     6      7        2  0.2857  0.868871   \n",
       "1      1   379     1        0       0     5     16        1  0.0625  0.765134   \n",
       "2      1   749     0        0       0     2      2        1  0.5000  0.802487   \n",
       "3      1   300     1        0       0     5      3        1  0.3333  0.865995   \n",
       "4      1   392     1        0       0     5     11        3  0.2727  0.822649   \n",
       "5      1   589     0        0       0     5      3        3  1.0000  0.870320   \n",
       "6      1   663     0        0       0     4      1        1  1.0000  0.738645   \n",
       "7      1   726     1        0       0     6      6        1  0.1667  0.774071   \n",
       "8      1   660     0        0       0     4      3        2  0.6667  0.732499   \n",
       "9      1   725     1        0       0     2      4        4  1.0000  0.827466   \n",
       "10     1   706     0        0       0     6      9        3  0.3333  0.866662   \n",
       "11     1   825     0        0       0     6      1        0  0.0000  0.881791   \n",
       "12     1   496     1        0       0     5      4        3  0.7500  0.860756   \n",
       "13     1   672     1        0       0     4     25        3  0.1200  0.736619   \n",
       "14     1    15     1        0       0     2      1        0  0.0000  0.830160   \n",
       "15     1   836     0        0       0     5      3        0  0.0000  0.872813   \n",
       "16     1    18     1        0       0     5      2        0  0.0000  0.717786   \n",
       "17     1    15     1        0       0     6      1        1  1.0000  0.767190   \n",
       "18     1   726     1        0       0     4      3        2  0.6667  0.734571   \n",
       "19     1   738     0        0       0     2      1        1  1.0000  0.628355   \n",
       "\n",
       "     总还款期数  \n",
       "0       59  \n",
       "1       42  \n",
       "2       11  \n",
       "3       31  \n",
       "4       45  \n",
       "5    69516  \n",
       "6        0  \n",
       "7       14  \n",
       "8      679  \n",
       "9   191689  \n",
       "10      13  \n",
       "11       0  \n",
       "12      50  \n",
       "13       4  \n",
       "14       0  \n",
       "15       0  \n",
       "16       0  \n",
       "17       5  \n",
       "18      57  \n",
       "19      12  "
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_set.head(20)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "heading_collapsed": true
   },
   "source": [
    "## NearMiss处理数据不平衡问题"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>信用评分</th>\n",
       "      <th>工作类型</th>\n",
       "      <th>是否出具驾驶证</th>\n",
       "      <th>是否填写护照</th>\n",
       "      <th>年龄区间</th>\n",
       "      <th>总贷款次数</th>\n",
       "      <th>总有效贷款次数</th>\n",
       "      <th>贷款成功率</th>\n",
       "      <th>贷款与资产比</th>\n",
       "      <th>总还款期数</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>300</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>7</td>\n",
       "      <td>2</td>\n",
       "      <td>0.2857</td>\n",
       "      <td>0.868871</td>\n",
       "      <td>59</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>379</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>16</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0625</td>\n",
       "      <td>0.765134</td>\n",
       "      <td>42</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>749</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>0.5000</td>\n",
       "      <td>0.802487</td>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>300</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>0.3333</td>\n",
       "      <td>0.865995</td>\n",
       "      <td>31</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>392</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>11</td>\n",
       "      <td>3</td>\n",
       "      <td>0.2727</td>\n",
       "      <td>0.822649</td>\n",
       "      <td>45</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>93931</th>\n",
       "      <td>771</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1.0000</td>\n",
       "      <td>0.802087</td>\n",
       "      <td>87</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>93932</th>\n",
       "      <td>300</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "      <td>0.5000</td>\n",
       "      <td>0.823497</td>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>93933</th>\n",
       "      <td>726</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>22</td>\n",
       "      <td>7</td>\n",
       "      <td>0.3182</td>\n",
       "      <td>0.860870</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>93934</th>\n",
       "      <td>753</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>1.0000</td>\n",
       "      <td>0.603055</td>\n",
       "      <td>525000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>93935</th>\n",
       "      <td>771</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>1.0000</td>\n",
       "      <td>0.867693</td>\n",
       "      <td>487</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>93936 rows × 10 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       信用评分  工作类型  是否出具驾驶证  是否填写护照  年龄区间  总贷款次数  总有效贷款次数   贷款成功率    贷款与资产比  \\\n",
       "0       300     1        0       0     6      7        2  0.2857  0.868871   \n",
       "1       379     1        0       0     5     16        1  0.0625  0.765134   \n",
       "2       749     0        0       0     2      2        1  0.5000  0.802487   \n",
       "3       300     1        0       0     5      3        1  0.3333  0.865995   \n",
       "4       392     1        0       0     5     11        3  0.2727  0.822649   \n",
       "...     ...   ...      ...     ...   ...    ...      ...     ...       ...   \n",
       "93931   771     1        0       0     2      1        1  1.0000  0.802087   \n",
       "93932   300     0        0       0     4      6        3  0.5000  0.823497   \n",
       "93933   726     1        0       0     5     22        7  0.3182  0.860870   \n",
       "93934   753     0        0       0     4      2        2  1.0000  0.603055   \n",
       "93935   771     1        0       0     5      2        2  1.0000  0.867693   \n",
       "\n",
       "        总还款期数  \n",
       "0          59  \n",
       "1          42  \n",
       "2          11  \n",
       "3          31  \n",
       "4          45  \n",
       "...       ...  \n",
       "93931      87  \n",
       "93932      14  \n",
       "93933       0  \n",
       "93934  525000  \n",
       "93935     487  \n",
       "\n",
       "[93936 rows x 10 columns]"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X = train_set.drop('是否违约',axis=1)\n",
    "X"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "hidden": true,
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>信用评分</th>\n",
       "      <th>工作类型</th>\n",
       "      <th>是否出具驾驶证</th>\n",
       "      <th>是否填写护照</th>\n",
       "      <th>年龄区间</th>\n",
       "      <th>总贷款次数</th>\n",
       "      <th>总有效贷款次数</th>\n",
       "      <th>贷款成功率</th>\n",
       "      <th>贷款与资产比</th>\n",
       "      <th>总还款期数</th>\n",
       "      <th>是否违约</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>825</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.825406</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>825</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.835515</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>825</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.825023</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>825</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.824834</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>825</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.877162</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   信用评分  工作类型  是否出具驾驶证  是否填写护照  年龄区间  总贷款次数  总有效贷款次数  贷款成功率    贷款与资产比  总还款期数  \\\n",
       "0   825     1        0       0     4      1        0    0.0  0.825406      0   \n",
       "1   825     0        0       0     4      2        0    0.0  0.835515      0   \n",
       "2   825     1        0       0     4      1        0    0.0  0.825023      0   \n",
       "3   825     1        0       0     4      1        0    0.0  0.824834      0   \n",
       "4   825     0        0       0     4      1        0    0.0  0.877162      0   \n",
       "\n",
       "   是否违约  \n",
       "0     0  \n",
       "1     0  \n",
       "2     0  \n",
       "3     0  \n",
       "4     0  "
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from imblearn.under_sampling  import NearMiss#调用NearMiss解决数据不平衡问题\n",
    "NM =NearMiss(version=1) # 设置为1，减少过拟合问题\n",
    "X_resampled, Y_resampled = NM.fit_resample(X, train_set['是否违约'])\n",
    "train_set_miss =pd.concat([X_resampled,Y_resampled],axis=1)\n",
    "train_set_miss.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>count</th>\n",
       "      <th>mean</th>\n",
       "      <th>std</th>\n",
       "      <th>min</th>\n",
       "      <th>25%</th>\n",
       "      <th>50%</th>\n",
       "      <th>75%</th>\n",
       "      <th>max</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>信用评分</th>\n",
       "      <td>30984.0</td>\n",
       "      <td>555.747160</td>\n",
       "      <td>296.816945</td>\n",
       "      <td>14.000000</td>\n",
       "      <td>300.000000</td>\n",
       "      <td>681.000000</td>\n",
       "      <td>825.00000</td>\n",
       "      <td>8.790000e+02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>工作类型</th>\n",
       "      <td>30984.0</td>\n",
       "      <td>0.438484</td>\n",
       "      <td>0.496209</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.00000</td>\n",
       "      <td>1.000000e+00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>是否出具驾驶证</th>\n",
       "      <td>30984.0</td>\n",
       "      <td>0.015653</td>\n",
       "      <td>0.124132</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>1.000000e+00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>是否填写护照</th>\n",
       "      <td>30984.0</td>\n",
       "      <td>0.001162</td>\n",
       "      <td>0.034067</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.00000</td>\n",
       "      <td>1.000000e+00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>年龄区间</th>\n",
       "      <td>30984.0</td>\n",
       "      <td>3.696844</td>\n",
       "      <td>1.334090</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>5.00000</td>\n",
       "      <td>6.000000e+00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>总贷款次数</th>\n",
       "      <td>30984.0</td>\n",
       "      <td>3.109476</td>\n",
       "      <td>5.349945</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>3.00000</td>\n",
       "      <td>4.530000e+02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>总有效贷款次数</th>\n",
       "      <td>30984.0</td>\n",
       "      <td>1.164440</td>\n",
       "      <td>1.722860</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.00000</td>\n",
       "      <td>3.400000e+01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>贷款成功率</th>\n",
       "      <td>30984.0</td>\n",
       "      <td>0.404097</td>\n",
       "      <td>0.420026</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.300000</td>\n",
       "      <td>1.00000</td>\n",
       "      <td>1.000000e+00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>贷款与资产比</th>\n",
       "      <td>30984.0</td>\n",
       "      <td>0.744563</td>\n",
       "      <td>0.105456</td>\n",
       "      <td>0.169757</td>\n",
       "      <td>0.693861</td>\n",
       "      <td>0.764853</td>\n",
       "      <td>0.82414</td>\n",
       "      <td>9.348990e-01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>总还款期数</th>\n",
       "      <td>30984.0</td>\n",
       "      <td>35888.136393</td>\n",
       "      <td>384319.585905</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>19.00000</td>\n",
       "      <td>4.354805e+07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>是否违约</th>\n",
       "      <td>30984.0</td>\n",
       "      <td>0.500000</td>\n",
       "      <td>0.500008</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.500000</td>\n",
       "      <td>1.00000</td>\n",
       "      <td>1.000000e+00</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           count          mean            std        min         25%  \\\n",
       "信用评分     30984.0    555.747160     296.816945  14.000000  300.000000   \n",
       "工作类型     30984.0      0.438484       0.496209   0.000000    0.000000   \n",
       "是否出具驾驶证  30984.0      0.015653       0.124132   0.000000    0.000000   \n",
       "是否填写护照   30984.0      0.001162       0.034067   0.000000    0.000000   \n",
       "年龄区间     30984.0      3.696844       1.334090   1.000000    3.000000   \n",
       "总贷款次数    30984.0      3.109476       5.349945   1.000000    1.000000   \n",
       "总有效贷款次数  30984.0      1.164440       1.722860   0.000000    0.000000   \n",
       "贷款成功率    30984.0      0.404097       0.420026   0.000000    0.000000   \n",
       "贷款与资产比   30984.0      0.744563       0.105456   0.169757    0.693861   \n",
       "总还款期数    30984.0  35888.136393  384319.585905   0.000000    0.000000   \n",
       "是否违约     30984.0      0.500000       0.500008   0.000000    0.000000   \n",
       "\n",
       "                50%        75%           max  \n",
       "信用评分     681.000000  825.00000  8.790000e+02  \n",
       "工作类型       0.000000    1.00000  1.000000e+00  \n",
       "是否出具驾驶证    0.000000    0.00000  1.000000e+00  \n",
       "是否填写护照     0.000000    0.00000  1.000000e+00  \n",
       "年龄区间       4.000000    5.00000  6.000000e+00  \n",
       "总贷款次数      1.000000    3.00000  4.530000e+02  \n",
       "总有效贷款次数    1.000000    1.00000  3.400000e+01  \n",
       "贷款成功率      0.300000    1.00000  1.000000e+00  \n",
       "贷款与资产比     0.764853    0.82414  9.348990e-01  \n",
       "总还款期数      3.000000   19.00000  4.354805e+07  \n",
       "是否违约       0.500000    1.00000  1.000000e+00  "
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_set_miss.describe().T"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#  备选模型比较"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 模型初比较"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import time\n",
    "# 模型处理模块\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.model_selection import GridSearchCV\n",
    "# 标准化处理模块\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "# 常规模型\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "from sklearn.neighbors import KNeighborsClassifier\n",
    "from sklearn.svm import SVC\n",
    "from sklearn.tree import DecisionTreeClassifier\n",
    "# 集成学习和stacking模型\n",
    "from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier, RandomForestClassifier\n",
    "import xgboost as xgb\n",
    "from xgboost.sklearn import XGBClassifier\n",
    "from mlxtend.classifier import StackingClassifier\n",
    "# 评价标准模块\n",
    "from sklearn import metrics\n",
    "from sklearn.metrics import accuracy_score,roc_auc_score,recall_score,precision_score\n",
    "import warnings\n",
    "warnings.filterwarnings('ignore')\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "X = train_set_miss.drop('是否违约',1) \n",
    "Y = train_set_miss['是否违约']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train,X_test,y_train,y_test = train_test_split(X,\n",
    "                                                 Y,test_size=0.3,random_state=420)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "def train_model(X_train, y_train, X_test, y_test,\n",
    "               model,model_name):\n",
    "    \n",
    "    print('训练{}'.format(model_name))\n",
    "    \n",
    "    clf=model\n",
    "    start = time.time()\n",
    "    clf.fit(X_train, y_train.values.ravel())\n",
    "    \n",
    "     #验证模型\n",
    "    print('训练准确率：{:.4f}'.format(clf.score(X_train, y_train)))\n",
    "    \n",
    "    \n",
    "    predict=clf.predict(X_test)\n",
    "    score = clf.score(X_test, y_test)\n",
    "    precision=precision_score(y_test,predict)\n",
    "    recall=recall_score(y_test,predict)\n",
    "    print('测试准确率：{:.4f}'.format(score))\n",
    "    print('测试精确率：{:.4f}'.format(precision))\n",
    "    print('测试召回率：{:.4f}'.format(recall))\n",
    "    \n",
    "    end = time.time()\n",
    "    duration = end - start\n",
    "    print('模型训练耗时：{:6f}s'.format(duration))\n",
    "    \n",
    "    \n",
    "    return clf, score,precision,recall, duration"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "训练LR\n",
      "训练准确率：0.7955\n",
      "测试准确率：0.7949\n",
      "测试精确率：0.8457\n",
      "测试召回率：0.7122\n",
      "模型训练耗时：0.181513s\n",
      "训练DT\n",
      "训练准确率：0.8626\n",
      "测试准确率：0.8330\n",
      "测试精确率：0.9073\n",
      "测试召回率：0.7352\n",
      "模型训练耗时：0.070813s\n",
      "训练AdaBoost\n",
      "训练准确率：0.8391\n",
      "测试准确率：0.8354\n",
      "测试精确率：0.8761\n",
      "测试召回率：0.7744\n",
      "模型训练耗时：0.827784s\n",
      "训练GBDT\n",
      "训练准确率：0.8503\n",
      "测试准确率：0.8446\n",
      "测试精确率：0.9073\n",
      "测试召回率：0.7612\n",
      "模型训练耗时：2.163216s\n",
      "训练RF\n",
      "训练准确率：0.9998\n",
      "测试准确率：0.8095\n",
      "测试精确率：0.8243\n",
      "测试召回率：0.7779\n",
      "模型训练耗时：2.832421s\n",
      "训练XGBoost\n",
      "训练准确率：0.8858\n",
      "测试准确率：0.8484\n",
      "测试精确率：0.9078\n",
      "测试召回率：0.7696\n",
      "模型训练耗时：1.133957s\n"
     ]
    }
   ],
   "source": [
    "model_name_param_dict = {    'LR': (LogisticRegression(penalty =\"l2\")),\n",
    "                             'DT': (DecisionTreeClassifier(max_depth=10,min_samples_split=10)),\n",
    "                             'AdaBoost': (AdaBoostClassifier()),\n",
    "                             'GBDT': (GradientBoostingClassifier()),\n",
    "                             'RF': (RandomForestClassifier()),\n",
    "                             'XGBoost' :(XGBClassifier())\n",
    "                           \n",
    "                         }\n",
    "\n",
    "result_df = pd.DataFrame(columns=['Accuracy (%)','precision(%)','recall(%)','Time (s)'],\n",
    "                             index=list(model_name_param_dict.keys()))\n",
    "\n",
    "for model_name, model in model_name_param_dict.items():\n",
    "    clf, acc,pre,recall, mean_duration = train_model(X_train, y_train,\n",
    "                                                        X_test, y_test,\n",
    "                                                        model,model_name)\n",
    "    result_df.loc[model_name, 'Accuracy (%)'] = acc\n",
    "    result_df.loc[model_name, 'precision(%)'] = pre\n",
    "    result_df.loc[model_name, 'recall(%)'] = recall\n",
    "    result_df.loc[model_name, 'Time (s)'] = mean_duration \n",
    "\n",
    "result_df.to_csv(os.path.join('model_comparison.csv'))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "heading_collapsed": true
   },
   "source": [
    "## 数据归一化"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>信用评分</th>\n",
       "      <th>工作类型</th>\n",
       "      <th>是否出具驾驶证</th>\n",
       "      <th>是否填写护照</th>\n",
       "      <th>年龄区间</th>\n",
       "      <th>总贷款次数</th>\n",
       "      <th>总有效贷款次数</th>\n",
       "      <th>贷款成功率</th>\n",
       "      <th>贷款与资产比</th>\n",
       "      <th>总还款期数</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.907149</td>\n",
       "      <td>1.131628</td>\n",
       "      <td>-0.126104</td>\n",
       "      <td>-0.034106</td>\n",
       "      <td>0.227242</td>\n",
       "      <td>-0.394305</td>\n",
       "      <td>-0.675887</td>\n",
       "      <td>-0.962093</td>\n",
       "      <td>0.766616</td>\n",
       "      <td>-0.093382</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.907149</td>\n",
       "      <td>-0.883682</td>\n",
       "      <td>-0.126104</td>\n",
       "      <td>-0.034106</td>\n",
       "      <td>0.227242</td>\n",
       "      <td>-0.207384</td>\n",
       "      <td>-0.675887</td>\n",
       "      <td>-0.962093</td>\n",
       "      <td>0.862473</td>\n",
       "      <td>-0.093382</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.907149</td>\n",
       "      <td>1.131628</td>\n",
       "      <td>-0.126104</td>\n",
       "      <td>-0.034106</td>\n",
       "      <td>0.227242</td>\n",
       "      <td>-0.394305</td>\n",
       "      <td>-0.675887</td>\n",
       "      <td>-0.962093</td>\n",
       "      <td>0.762982</td>\n",
       "      <td>-0.093382</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.907149</td>\n",
       "      <td>1.131628</td>\n",
       "      <td>-0.126104</td>\n",
       "      <td>-0.034106</td>\n",
       "      <td>0.227242</td>\n",
       "      <td>-0.394305</td>\n",
       "      <td>-0.675887</td>\n",
       "      <td>-0.962093</td>\n",
       "      <td>0.761194</td>\n",
       "      <td>-0.093382</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.907149</td>\n",
       "      <td>-0.883682</td>\n",
       "      <td>-0.126104</td>\n",
       "      <td>-0.034106</td>\n",
       "      <td>0.227242</td>\n",
       "      <td>-0.394305</td>\n",
       "      <td>-0.675887</td>\n",
       "      <td>-0.962093</td>\n",
       "      <td>1.257404</td>\n",
       "      <td>-0.093382</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30979</th>\n",
       "      <td>-0.750465</td>\n",
       "      <td>-0.883682</td>\n",
       "      <td>-0.126104</td>\n",
       "      <td>-0.034106</td>\n",
       "      <td>0.227242</td>\n",
       "      <td>1.101062</td>\n",
       "      <td>-0.095447</td>\n",
       "      <td>-0.697581</td>\n",
       "      <td>-0.535280</td>\n",
       "      <td>-0.093372</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30980</th>\n",
       "      <td>-0.662868</td>\n",
       "      <td>1.131628</td>\n",
       "      <td>-0.126104</td>\n",
       "      <td>-0.034106</td>\n",
       "      <td>0.976829</td>\n",
       "      <td>0.166458</td>\n",
       "      <td>0.484992</td>\n",
       "      <td>0.228330</td>\n",
       "      <td>-0.593570</td>\n",
       "      <td>-0.093380</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30981</th>\n",
       "      <td>0.610665</td>\n",
       "      <td>1.131628</td>\n",
       "      <td>-0.126104</td>\n",
       "      <td>-0.034106</td>\n",
       "      <td>0.227242</td>\n",
       "      <td>-0.394305</td>\n",
       "      <td>-0.095447</td>\n",
       "      <td>1.418753</td>\n",
       "      <td>1.333019</td>\n",
       "      <td>-0.093320</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30982</th>\n",
       "      <td>-0.029470</td>\n",
       "      <td>-0.883682</td>\n",
       "      <td>-0.126104</td>\n",
       "      <td>-0.034106</td>\n",
       "      <td>0.227242</td>\n",
       "      <td>2.783349</td>\n",
       "      <td>3.967630</td>\n",
       "      <td>0.095955</td>\n",
       "      <td>0.080075</td>\n",
       "      <td>-0.093323</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30983</th>\n",
       "      <td>-0.467458</td>\n",
       "      <td>-0.883682</td>\n",
       "      <td>-0.126104</td>\n",
       "      <td>-0.034106</td>\n",
       "      <td>0.976829</td>\n",
       "      <td>2.596428</td>\n",
       "      <td>0.484992</td>\n",
       "      <td>-0.682105</td>\n",
       "      <td>0.707408</td>\n",
       "      <td>-0.093375</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>30984 rows × 10 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "           信用评分      工作类型   是否出具驾驶证    是否填写护照      年龄区间     总贷款次数   总有效贷款次数  \\\n",
       "0      0.907149  1.131628 -0.126104 -0.034106  0.227242 -0.394305 -0.675887   \n",
       "1      0.907149 -0.883682 -0.126104 -0.034106  0.227242 -0.207384 -0.675887   \n",
       "2      0.907149  1.131628 -0.126104 -0.034106  0.227242 -0.394305 -0.675887   \n",
       "3      0.907149  1.131628 -0.126104 -0.034106  0.227242 -0.394305 -0.675887   \n",
       "4      0.907149 -0.883682 -0.126104 -0.034106  0.227242 -0.394305 -0.675887   \n",
       "...         ...       ...       ...       ...       ...       ...       ...   \n",
       "30979 -0.750465 -0.883682 -0.126104 -0.034106  0.227242  1.101062 -0.095447   \n",
       "30980 -0.662868  1.131628 -0.126104 -0.034106  0.976829  0.166458  0.484992   \n",
       "30981  0.610665  1.131628 -0.126104 -0.034106  0.227242 -0.394305 -0.095447   \n",
       "30982 -0.029470 -0.883682 -0.126104 -0.034106  0.227242  2.783349  3.967630   \n",
       "30983 -0.467458 -0.883682 -0.126104 -0.034106  0.976829  2.596428  0.484992   \n",
       "\n",
       "          贷款成功率    贷款与资产比     总还款期数  \n",
       "0     -0.962093  0.766616 -0.093382  \n",
       "1     -0.962093  0.862473 -0.093382  \n",
       "2     -0.962093  0.762982 -0.093382  \n",
       "3     -0.962093  0.761194 -0.093382  \n",
       "4     -0.962093  1.257404 -0.093382  \n",
       "...         ...       ...       ...  \n",
       "30979 -0.697581 -0.535280 -0.093372  \n",
       "30980  0.228330 -0.593570 -0.093380  \n",
       "30981  1.418753  1.333019 -0.093320  \n",
       "30982  0.095955  0.080075 -0.093323  \n",
       "30983 -0.682105  0.707408 -0.093375  \n",
       "\n",
       "[30984 rows x 10 columns]"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 归一化\n",
    "from sklearn.preprocessing import StandardScaler \n",
    "\n",
    "scaler = StandardScaler() \n",
    "X_std = pd.DataFrame(scaler.fit_transform(X),columns = X.columns)\n",
    "X_std\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {
    "hidden": true
   },
   "outputs": [],
   "source": [
    "\n",
    "\n",
    "X_train,X_test,y_train,y_test = train_test_split(X_std,\n",
    "                                                 Y,test_size=0.3,random_state=420)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {
    "hidden": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "训练LR\n",
      "训练准确率：0.7998\n",
      "测试准确率：0.7987\n",
      "测试精确率：0.8656\n",
      "测试召回率：0.6986\n",
      "模型训练耗时：0.053855s\n",
      "训练DT\n",
      "训练准确率：0.8628\n",
      "测试准确率：0.8328\n",
      "测试精确率：0.9074\n",
      "测试召回率：0.7345\n",
      "模型训练耗时：0.068815s\n",
      "训练AdaBoost\n",
      "训练准确率：0.8391\n",
      "测试准确率：0.8354\n",
      "测试精确率：0.8761\n",
      "测试召回率：0.7744\n",
      "模型训练耗时：0.829810s\n",
      "训练GBDT\n",
      "训练准确率：0.8503\n",
      "测试准确率：0.8446\n",
      "测试精确率：0.9073\n",
      "测试召回率：0.7612\n",
      "模型训练耗时：2.126308s\n",
      "训练RF\n",
      "训练准确率：0.9998\n",
      "测试准确率：0.8073\n",
      "测试精确率：0.8244\n",
      "测试召回率：0.7722\n",
      "模型训练耗时：2.823424s\n",
      "训练XGBoost\n",
      "训练准确率：0.8858\n",
      "测试准确率：0.8484\n",
      "测试精确率：0.9078\n",
      "测试召回率：0.7696\n",
      "模型训练耗时：1.113022s\n"
     ]
    }
   ],
   "source": [
    "model_name_param_dict = {    'LR': (LogisticRegression(penalty =\"l2\")),\n",
    "                             'DT': (DecisionTreeClassifier(max_depth=10,min_samples_split=10)),\n",
    "                             'AdaBoost': (AdaBoostClassifier()),\n",
    "                             'GBDT': (GradientBoostingClassifier()),\n",
    "                             'RF': (RandomForestClassifier()),\n",
    "                             'XGBoost' :(XGBClassifier())\n",
    "                           \n",
    "                         }\n",
    "\n",
    "result_df = pd.DataFrame(columns=['Accuracy (%)','precision(%)','recall(%)','Time (s)'],\n",
    "                             index=list(model_name_param_dict.keys()))\n",
    "\n",
    "for model_name, model in model_name_param_dict.items():\n",
    "    clf, acc,pre,recall, mean_duration = train_model(X_train, y_train,\n",
    "                                                        X_test, y_test,\n",
    "                                                        model,model_name)\n",
    "    result_df.loc[model_name, 'Accuracy (%)'] = acc\n",
    "    result_df.loc[model_name, 'precision(%)'] = pre\n",
    "    result_df.loc[model_name, 'recall(%)'] = recall\n",
    "    result_df.loc[model_name, 'Time (s)'] = mean_duration \n",
    "\n",
    "result_df.to_csv(os.path.join('model_comparison.csv'))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hidden": true
   },
   "source": [
    "综合考虑选用RF模型"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 网格搜索调优"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.ensemble import RandomForestClassifier\n",
    "\n",
    "randomforest = RandomForestClassifier(n_jobs=-1)\n",
    "\n",
    "\n",
    "from sklearn.model_selection import GridSearchCV \n",
    "\n",
    "p = {\n",
    "    \n",
    "    'max_depth': range(5,20),\n",
    "    'n_estimators' : range(10,50,5)\n",
    "    \n",
    "    \n",
    "}\n",
    "GS = GridSearchCV(randomforest,p,cv =5 )\n",
    "params = GS.fit(X_train ,y_train)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'max_depth': 12, 'n_estimators': 45}"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "params.best_params_#最佳参数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [],
   "source": [
    "randomforest_best = RandomForestClassifier(n_jobs=-1,max_depth=10,n_estimators =35)#实例化对象"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style>#sk-container-id-1 {color: black;background-color: white;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>RandomForestClassifier(max_depth=10, n_estimators=35, n_jobs=-1)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">RandomForestClassifier</label><div class=\"sk-toggleable__content\"><pre>RandomForestClassifier(max_depth=10, n_estimators=35, n_jobs=-1)</pre></div></div></div></div></div>"
      ],
      "text/plain": [
       "RandomForestClassifier(max_depth=10, n_estimators=35, n_jobs=-1)"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "randomforest_best.fit(X_train,y_train)#训练"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.8434810671256454"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "randomforest_best.score(X_test,y_test)#训练模型得分"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.9144456289978679 0.7515881708652793\n"
     ]
    }
   ],
   "source": [
    "predict=randomforest_best.predict(X_test)#计算精准率和召回率\n",
    "print(precision_score(y_test,predict),recall_score(y_test,predict))\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 优质模型保存"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [],
   "source": [
    "import joblib\n",
    " \n",
    "#保存模型\n",
    "def save_model(model, filepath):\n",
    "\n",
    "    joblib.dump(model, filename=filepath)\n",
    "\n",
    "def load_model(filepath):\n",
    "    model = joblib.load(filepath)\n",
    "    return model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [],
   "source": [
    "save_model(randomforest_best,filepath='C:/Users/86180/数据分析师训练营/作业资料下载/Untitled.ipynb')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.12"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {
    "height": "calc(100% - 180px)",
    "left": "10px",
    "top": "150px",
    "width": "426.667px"
   },
   "toc_section_display": true,
   "toc_window_display": true
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
